Merge branch 'master' into dep-fixes

This commit is contained in:
Elijah Potter
2025-02-17 13:33:29 -07:00
28 changed files with 1369 additions and 1078 deletions

12
Cargo.lock generated
View File

@@ -235,9 +235,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.28"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e77c3243bd94243c03672cb5154667347c457ca271254724f9f393aee1c05ff"
checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184"
dependencies = [
"clap_builder",
"clap_derive",
@@ -245,9 +245,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.27"
version = "4.5.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7"
checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9"
dependencies = [
"anstream",
"anstyle",
@@ -1564,9 +1564,9 @@ dependencies = [
[[package]]
name = "smallvec"
version = "1.13.2"
version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
dependencies = [
"serde",
]

View File

@@ -9,7 +9,7 @@ repository = "https://github.com/automattic/harper"
[dependencies]
anyhow = "1.0.95"
ariadne = "0.4.1"
clap = { version = "4.5.28", features = ["derive"] }
clap = { version = "4.5.29", features = ["derive"] }
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.21.0" }
harper-core = { path = "../harper-core", version = "0.21.0" }
harper-comments = { path = "../harper-comments", version = "0.21.0" }

View File

@@ -19,7 +19,7 @@ paste = "1.0.14"
pulldown-cmark = "0.12.2"
serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.138"
smallvec = { version = "1.13.2", features = ["serde"] }
smallvec = { version = "1.14.0", features = ["serde"] }
thiserror = "2.0.11"
unicode-blocks = "0.1.9"
unicode-script = "0.5.7"

View File

@@ -8570,7 +8570,6 @@ Ryan/2M
Rydberg/2M
Ryder/2M
Ryukyu/2M
S/152MN
SA/5124
SAC/12
SALT/2M
@@ -37361,7 +37360,6 @@ pox/14MS
pp/1
ppm/1
ppr
pr/5
practicability/1M
practicably/
practical/51SMY
@@ -49928,3 +49926,19 @@ backplane/SM
cyberattack/SM
RTX/1SM
PDP/1SM
Jetpack/2M
learnt/4
WooCommerce/2M
Tumblr/2M
Akismet/2M
Simplenote/2M
Longreads/2M
VaultPress/2M
Gravatar/2M
Crowdsignal/2M
Mullenweg/M
journaling/SM3
Polldaddy/2M
P2/M
VideoPress/2M
stylization/SM

View File

@@ -129,7 +129,10 @@ impl Document {
for token in self.tokens.iter_mut() {
if let TokenKind::Word(meta) = &mut token.kind {
let word_source = token.span.get_content(&self.source);
let found_meta = dictionary.get_word_metadata(word_source);
let found_meta = dictionary
.get_correct_capitalization_of(word_source)
.map(|canonical_caps| dictionary.get_word_metadata(canonical_caps))
.unwrap_or_default();
*meta = meta.or(&found_meta);
}
}
@@ -535,6 +538,7 @@ macro_rules! create_fns_on_doc {
impl TokenStringExt for Document {
create_fns_on_doc!(word);
create_fns_on_doc!(hostname);
create_fns_on_doc!(word_like);
create_fns_on_doc!(conjunction);
create_fns_on_doc!(space);

View File

@@ -58,6 +58,7 @@ impl PatternLinter for GeneralCompoundNouns {
fn match_to_lint(&self, matched_tokens: &[Token], source: &[char]) -> Lint {
let span = matched_tokens.span().unwrap();
let orig = span.get_content(source);
// If the pattern matched, this will not return `None`.
let word = self
.split_pattern
@@ -67,7 +68,7 @@ impl PatternLinter for GeneralCompoundNouns {
Lint {
span,
lint_kind: LintKind::Spelling,
suggestions: vec![Suggestion::ReplaceWith(word.to_vec())],
suggestions: vec![Suggestion::replace_with_match_case(word.to_vec(), orig)],
message: format!(
"Did you mean the closed compound noun “{}”?",
word.to_string()

View File

@@ -226,7 +226,7 @@ mod tests {
#[test]
fn web_socket() {
let test_sentence = "Real-time updates are sent via a web socket.";
let expected = "Real-time updates are sent via a WebSocket.";
let expected = "Real-time updates are sent via a websocket.";
assert_suggestion_result(test_sentence, CompoundNouns::default(), expected);
}

View File

@@ -47,7 +47,7 @@ use super::phrase_corrections::MutePoint;
use super::phrase_corrections::StateOfTheArt;
use super::phrase_corrections::WantBe;
use super::phrase_corrections::{
AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang,
AndTheLike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang,
LetAlone, LoAndBehold, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SpecialAttention,
SupposedTo, ThanOthers, ThatChallenged, TurnItOff,
};
@@ -55,6 +55,10 @@ use super::pique_interest::PiqueInterest;
use super::plural_conjugate::PluralConjugate;
use super::possessive_your::PossessiveYour;
use super::pronoun_contraction::PronounContraction;
use super::proper_noun_capitalization_linters::DayOneNames;
use super::proper_noun_capitalization_linters::JetpackNames;
use super::proper_noun_capitalization_linters::PocketCastsNames;
use super::proper_noun_capitalization_linters::TumblrNames;
use super::proper_noun_capitalization_linters::{
AmazonNames, Americas, AppleNames, Australia, AzureNames, Canada, ChineseCommunistParty,
GoogleNames, Holidays, Koreas, Malaysia, MetaNames, MicrosoftNames, UnitedOrganizations,
@@ -72,6 +76,7 @@ use super::unclosed_quotes::UnclosedQuotes;
use super::use_genitive::UseGenitive;
use super::was_aloud::WasAloud;
use super::whereas::Whereas;
use super::wordpress_dotcom::WordPressDotcom;
use super::wrong_quotes::WrongQuotes;
use super::{CurrencyPlacement, Lint, Linter, NoOxfordComma, OxfordComma};
use crate::{Dictionary, Document};
@@ -195,6 +200,11 @@ macro_rules! create_lint_group_config {
}
create_lint_group_config!(
WordPressDotcom => true,
DayOneNames => true,
PocketCastsNames => true,
TumblrNames => true,
JetpackNames => true,
OutOfDate => true,
Desktop => true,
Laptop => true,
@@ -312,7 +322,7 @@ create_lint_group_config!(
ThatChallenged => true,
TurnItOff => true,
OfCourse => true,
AndAlike => true,
AndTheLike => true,
BadRap => true,
BatedBreath => true,
BeckAndCall => true,

View File

@@ -56,6 +56,7 @@ mod unclosed_quotes;
mod use_genitive;
mod was_aloud;
mod whereas;
mod wordpress_dotcom;
mod wrong_quotes;
pub use an_a::AnA;
@@ -109,7 +110,7 @@ pub use phrase_corrections::MutePoint;
pub use phrase_corrections::StateOfTheArt;
pub use phrase_corrections::WantBe;
pub use phrase_corrections::{
AndAlike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang,
AndTheLike, BadRap, BatedBreath, BeckAndCall, ChangeTack, EnMasse, HumanLife, HungerPang,
LetAlone, LoAndBehold, NeedHelp, NoLonger, OfCourse, SneakingSuspicion, SpecialAttention,
SupposedTo, ThanOthers, ThatChallenged, TurnItOff,
};
@@ -117,6 +118,10 @@ pub use pique_interest::PiqueInterest;
pub use plural_conjugate::PluralConjugate;
pub use possessive_your::PossessiveYour;
pub use pronoun_contraction::PronounContraction;
pub use proper_noun_capitalization_linters::DayOneNames;
pub use proper_noun_capitalization_linters::JetpackNames;
pub use proper_noun_capitalization_linters::PocketCastsNames;
pub use proper_noun_capitalization_linters::TumblrNames;
pub use proper_noun_capitalization_linters::{
AmazonNames, Americas, AppleNames, Australia, AzureNames, Canada, ChineseCommunistParty,
GoogleNames, Holidays, Koreas, Malaysia, MetaNames, MicrosoftNames, UnitedOrganizations,
@@ -135,6 +140,7 @@ pub use unclosed_quotes::UnclosedQuotes;
pub use use_genitive::UseGenitive;
pub use was_aloud::WasAloud;
pub use whereas::Whereas;
pub use wordpress_dotcom::WordPressDotcom;
pub use wrong_quotes::WrongQuotes;
use crate::Document;

View File

@@ -78,7 +78,7 @@ create_linter_for_phrase!(ThatChallenged, "that challenged", 2);
create_linter_for_phrase!(NoLonger, "no longer", 1);
create_linter_for_phrase!(NeedHelp, "need help", 1);
create_linter_for_phrase!(OfCourse, "of course", 1);
create_linter_for_phrase!(AndAlike, "and alike", 1);
create_linter_for_phrase!(AndTheLike, "and the like", 1);
create_linter_for_phrase!(BadRap, "bad rap", 1);
create_linter_for_phrase!(BatedBreath, "bated breath", 1);
create_linter_for_phrase!(BeckAndCall, "beck and call", 1);

View File

@@ -39,7 +39,7 @@ impl PatternLinter for PossessiveYour {
Suggestion::replace_with_match_case("your".chars().collect(), orig_chars),
Suggestion::replace_with_match_case("you're an".chars().collect(), orig_chars),
],
message: "The possesive version of this word is more common in this context."
message: "The possessive version of this word is more common in this context."
.to_owned(),
..Default::default()
}

View File

@@ -684,6 +684,113 @@ create_linter_for!(
"When referring to Meta products and services, make sure to treat them as proper nouns."
);
create_linter_for!(
JetpackNames,
SequencePattern::default()
.t_aco("Jetpack")
.then_whitespace()
.then(Box::new(EitherPattern::new(vec![
Box::new(
SequencePattern::default()
.t_aco("VaultPress")
.then_whitespace()
.t_aco("Backup")
),
Box::new(SequencePattern::default().t_aco("VaultPress")),
Box::new(SequencePattern::default().t_aco("Scan")),
Box::new(
SequencePattern::default()
.t_aco("Akismet")
.then_whitespace()
.t_aco("Anti-spam")
),
Box::new(SequencePattern::default().t_aco("Stats")),
Box::new(SequencePattern::default().t_aco("Social")),
Box::new(SequencePattern::default().t_aco("Blaze")),
Box::new(
SequencePattern::default()
.t_aco("AI")
.then_whitespace()
.t_aco("Assistant")
),
Box::new(
SequencePattern::default()
.t_aco("Site")
.then_whitespace()
.t_aco("Search")
),
Box::new(SequencePattern::default().t_aco("Boost")),
Box::new(SequencePattern::default().t_aco("VideoPress")),
Box::new(
SequencePattern::default()
.t_aco("For")
.then_whitespace()
.t_aco("Agencies")
),
Box::new(SequencePattern::default().t_aco("CRM")),
]))),
"Ensure proper capitalization of Jetpack-related terms."
);
create_linter_for!(
TumblrNames,
SequencePattern::default()
.t_aco("Tumblr")
.then_whitespace()
.then(Box::new(EitherPattern::new(vec![
Box::new(SequencePattern::default().t_aco("Blaze")),
Box::new(SequencePattern::default().t_aco("Pro")),
Box::new(SequencePattern::default().t_aco("Live")),
Box::new(SequencePattern::default().t_aco("Ads")),
Box::new(SequencePattern::default().t_aco("Communities")),
Box::new(SequencePattern::default().t_aco("Shop")),
Box::new(SequencePattern::default().t_aco("Dashboard"))
]))),
"Ensure proper capitalization of Tumblr-related terms."
);
create_linter_for!(
PocketCastsNames,
EitherPattern::new(vec![
Box::new(
SequencePattern::default()
.t_aco("Pocket")
.then_whitespace()
.t_aco("Casts")
),
Box::new(
SequencePattern::default()
.t_aco("Pocket")
.then_whitespace()
.t_aco("Casts")
.then_whitespace()
.t_aco("Plus")
)
]),
"Ensure proper capitalization of Pocket Casts and Pocket Casts Plus as brand names."
);
create_linter_for!(
DayOneNames,
EitherPattern::new(vec![
Box::new(
SequencePattern::default()
.t_aco("Day")
.then_whitespace()
.t_aco("One")
),
Box::new(
SequencePattern::default()
.t_aco("Day")
.then_whitespace()
.t_aco("One")
.then_whitespace()
.t_aco("Premium")
)
]),
"Ensure proper capitalization of Day One and Day One Premium as brand names."
);
#[cfg(test)]
mod tests {
use crate::linting::tests::{assert_lint_count, assert_suggestion_result};

View File

@@ -5,7 +5,7 @@ use super::Suggestion;
use super::{Lint, LintKind, Linter};
use crate::document::Document;
use crate::spell::suggest_correct_spelling;
use crate::{CharString, Dictionary, TokenStringExt};
use crate::{CharString, CharStringExt, Dictionary, TokenStringExt};
pub struct SpellCheck<T>
where
@@ -56,7 +56,9 @@ impl<T: Dictionary> Linter for SpellCheck<T> {
for word in document.iter_words() {
let word_chars = document.get_span_content(word.span);
if self.dictionary.contains_word(word_chars) {
if self.dictionary.contains_exact_word(word_chars)
|| self.dictionary.contains_exact_word(&word_chars.to_lower())
{
continue;
}

View File

@@ -0,0 +1,57 @@
use crate::{CharString, CharStringExt, TokenStringExt};
use super::{Lint, LintKind, Linter, Suggestion};
/// Make sure you properly capitalize `WordPress.com`.
#[derive(Default)]
pub struct WordPressDotcom;
impl Linter for WordPressDotcom {
fn lint(&mut self, document: &crate::Document) -> Vec<Lint> {
let correct: CharString = "WordPress.com".chars().collect();
let correct_lower = correct.to_lower();
let mut lints = Vec::new();
for hostname in document.iter_hostnames() {
let text = document.get_span_content(hostname.span);
if correct.as_slice() != text && text.to_lower() == correct_lower {
lints.push(Lint {
span: hostname.span,
lint_kind: LintKind::Style,
suggestions: vec![Suggestion::ReplaceWith(correct.to_vec())],
message: "The WordPress hosting provider should be stylized as `WordPress.com`"
.to_owned(),
priority: 31,
});
}
}
lints
}
fn description(&self) -> &str {
"Ensures correct capitalization of WordPress.com. This rule verifies that the official stylization of WordPress.com is used when referring to the hosting provider."
}
}
#[cfg(test)]
mod tests {
use crate::linting::tests::assert_suggestion_result;
use super::WordPressDotcom;
#[test]
fn simple() {
assert_suggestion_result("wordpress.com", WordPressDotcom, "WordPress.com");
}
#[test]
fn sentence() {
assert_suggestion_result(
"wordpress.com is a great hosting provider",
WordPressDotcom,
"WordPress.com is a great hosting provider",
);
}
}

View File

@@ -34,36 +34,25 @@ impl SplitCompoundWord {
word_b: Token,
source: &[char],
) -> Option<CharString> {
let mut a_chars: CharString = word_a.span.get_content(source).into();
let mut b_chars: CharString = word_b.span.get_content(source).into();
let a_chars: CharString = word_a.span.get_content(source).into();
let b_chars: CharString = word_b.span.get_content(source).into();
let mut buffer = CharString::new();
macro_rules! attempt {
() => {
buffer.clear();
buffer.extend_from_slice(&a_chars);
buffer.extend_from_slice(&b_chars);
if self.dict.contains_word(&buffer) {
let metadata = self.dict.get_word_metadata(&buffer);
if (self.predicate)(metadata) {
let correct = self.dict.get_correct_capitalization_of(&buffer).unwrap();
buffer.clear();
buffer.extend_from_slice(&a_chars);
buffer.extend_from_slice(&b_chars);
if self.dict.contains_word(&buffer) {
let metadata = self.dict.get_word_metadata(&buffer);
if (self.predicate)(metadata) {
return Some(buffer);
}
}
};
buffer.extend_from_slice(correct);
return Some(buffer);
}
}
attempt!();
a_chars[0] = a_chars[0].to_ascii_uppercase();
attempt!();
b_chars[0] = b_chars[0].to_ascii_uppercase();
attempt!();
a_chars[0] = a_chars[0].to_ascii_lowercase();
attempt!();
b_chars[0] = b_chars[0].to_ascii_lowercase();
attempt!();
None
}
}

View File

@@ -8,10 +8,14 @@ use crate::WordMetadata;
/// See also: [`super::FstDictionary`] and [`super::MutableDictionary`].
#[blanket(derive(Arc))]
pub trait Dictionary: Send + Sync {
/// Check if the dictionary contains a given word.
/// Check if the dictionary contains any capitalization of a given word.
fn contains_word(&self, word: &[char]) -> bool;
/// Check if the dictionary contains a given word.
/// Check if the dictionary contains any capitalization of a given word.
fn contains_word_str(&self, word: &str) -> bool;
/// Check if the dictionary contains the exact capitalization of a given word.
fn contains_exact_word(&self, word: &[char]) -> bool;
/// Check if the dictionary contains the exact capitalization of a given word.
fn contains_exact_word_str(&self, word: &str) -> bool;
/// Gets best fuzzy match from dictionary
fn fuzzy_match(
&self,
@@ -26,15 +30,15 @@ pub trait Dictionary: Send + Sync {
max_distance: u8,
max_results: usize,
) -> Vec<FuzzyMatchResult>;
/// Get the associated [`WordMetadata`] for a given word.
fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]>;
/// Get the associated [`WordMetadata`] for any capitalization of a given word.
/// If the word isn't in the dictionary, the resulting metadata will be
/// empty.
fn get_word_metadata(&self, word: &[char]) -> WordMetadata;
/// Get the associated [`WordMetadata`] for a given word.
/// Get the associated [`WordMetadata`] for any capitalization of a given word.
/// If the word isn't in the dictionary, the resulting metadata will be
/// empty.
fn get_word_metadata_str(&self, word: &str) -> WordMetadata;
/// Iterate over the words in the dictionary.
fn words_iter(&self) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_>;

View File

@@ -212,6 +212,18 @@ impl Dictionary for FstDictionary {
fn words_with_len_iter(&self, len: usize) -> Box<dyn Iterator<Item = &'_ [char]> + Send + '_> {
self.full_dict.words_with_len_iter(len)
}
fn contains_exact_word(&self, word: &[char]) -> bool {
self.full_dict.contains_exact_word(word)
}
fn contains_exact_word_str(&self, word: &str) -> bool {
self.full_dict.contains_exact_word_str(word)
}
fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
self.full_dict.get_correct_capitalization_of(word)
}
}
#[cfg(test)]

View File

@@ -37,6 +37,15 @@ impl Default for MergedDictionary {
}
impl Dictionary for MergedDictionary {
fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
for child in &self.children {
if let Some(word) = child.get_correct_capitalization_of(word) {
return Some(word);
}
}
None
}
fn contains_word(&self, word: &[char]) -> bool {
for child in &self.children {
if child.contains_word(word) {
@@ -46,6 +55,15 @@ impl Dictionary for MergedDictionary {
false
}
fn contains_exact_word(&self, word: &[char]) -> bool {
for child in &self.children {
if child.contains_exact_word(word) {
return true;
}
}
false
}
fn get_word_metadata(&self, word: &[char]) -> WordMetadata {
let mut found_metadata = WordMetadata::default();
for child in &self.children {
@@ -72,6 +90,11 @@ impl Dictionary for MergedDictionary {
self.contains_word(&chars)
}
fn contains_exact_word_str(&self, word: &str) -> bool {
let chars: CharString = word.chars().collect();
self.contains_word(&chars)
}
fn get_word_metadata_str(&self, word: &str) -> WordMetadata {
let chars: CharString = word.chars().collect();
self.get_word_metadata(&chars)

View File

@@ -17,9 +17,9 @@ mod mutable_dictionary;
#[derive(PartialEq, Debug, Hash, Eq)]
pub struct FuzzyMatchResult<'a> {
word: &'a [char],
edit_distance: u8,
metadata: WordMetadata,
pub word: &'a [char],
pub edit_distance: u8,
pub metadata: WordMetadata,
}
impl PartialOrd for FuzzyMatchResult<'_> {

View File

@@ -14,7 +14,7 @@ use crate::{CharString, CharStringExt, WordMetadata};
use super::dictionary::Dictionary;
use super::FuzzyMatchResult;
/// A basic dictionary that allows words to be added.
/// A basic dictionary that allows words to be added after instantiating.
/// This is useful for user and file dictionaries that may change at runtime.
///
/// For immutable use-cases, such as the curated dictionary, prefer [`super::FstDictionary`],
@@ -37,6 +37,12 @@ pub struct MutableDictionary {
word_len_starts: Vec<usize>,
/// All English words
word_map: HashMap<CharString, WordMetadata>,
/// A map from the lowercase versions of a word to the correct capitalization
/// of that same word.
///
/// It can be used to check if a word is correctly capitalized, or if it is valid, regardless of
/// capitalization.
word_map_lowercase: HashMap<CharString, CharString>,
}
/// The uncached function that is used to produce the original copy of the
@@ -56,8 +62,14 @@ fn uncached_inner_new() -> Arc<MutableDictionary> {
words.dedup();
words.sort_unstable_by_key(|w| w.len());
let mut word_map_lowercase = HashMap::with_capacity(word_map.len());
for key in word_map.keys() {
word_map_lowercase.insert(key.to_lower(), key.clone());
}
Arc::new(MutableDictionary {
word_map,
word_map_lowercase,
word_len_starts: MutableDictionary::create_len_starts(&words),
words,
})
@@ -73,6 +85,7 @@ impl MutableDictionary {
words: Vec::new(),
word_len_starts: Vec::new(),
word_map: HashMap::new(),
word_map_lowercase: HashMap::new(),
}
}
@@ -98,6 +111,8 @@ impl MutableDictionary {
self.words.extend(pairs.iter().map(|(v, _)| v.clone()));
self.words.sort_by_key(|w| w.len());
self.word_len_starts = Self::create_len_starts(&self.words);
self.word_map_lowercase
.extend(pairs.iter().map(|(key, _)| (key.to_lower(), key.clone())));
self.word_map.extend(pairs);
}
@@ -145,12 +160,13 @@ impl Default for MutableDictionary {
impl Dictionary for MutableDictionary {
fn get_word_metadata(&self, word: &[char]) -> WordMetadata {
let normalized = seq_to_normalized(word);
let lowercase: CharString = normalized.to_lower();
let Some(correct_caps) = self.get_correct_capitalization_of(&normalized) else {
return WordMetadata::default();
};
self.word_map
.get(normalized.as_ref())
.get(correct_caps)
.cloned()
.or(self.word_map.get(lowercase.as_ref()).cloned())
.unwrap_or(WordMetadata::default())
}
@@ -158,7 +174,7 @@ impl Dictionary for MutableDictionary {
let normalized = seq_to_normalized(word);
let lowercase: CharString = normalized.to_lower();
self.word_map.contains_key(normalized.as_ref()) || self.word_map.contains_key(&lowercase)
self.word_map_lowercase.contains_key(&lowercase)
}
fn contains_word_str(&self, word: &str) -> bool {
@@ -171,6 +187,15 @@ impl Dictionary for MutableDictionary {
self.get_word_metadata(&chars)
}
fn get_correct_capitalization_of(&self, word: &[char]) -> Option<&'_ [char]> {
let normalized = seq_to_normalized(word);
let lowercase: CharString = normalized.to_lower();
self.word_map_lowercase
.get(&lowercase)
.map(|v| v.as_slice())
}
/// Suggest a correct spelling for a given misspelled word.
/// `Self::word` is assumed to be quite small (n < 100).
/// `max_distance` relates to an optimization that allows the search
@@ -258,6 +283,15 @@ impl Dictionary for MutableDictionary {
Box::new(self.words[start..end].iter().map(|v| v.as_slice()))
}
fn contains_exact_word(&self, word: &[char]) -> bool {
self.word_map.contains_key(seq_to_normalized(word).as_ref())
}
fn contains_exact_word_str(&self, word: &str) -> bool {
let word: CharString = word.chars().collect();
self.contains_exact_word(word.as_ref())
}
}
#[cfg(test)]

View File

@@ -35,6 +35,22 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary)
let mut output = toks.span().unwrap().get_content(source).to_vec();
while let Some((index, word)) = word_likes.next() {
if let Some(metadata) = word.kind.as_word() {
if metadata.is_proper_noun() {
// Replace it with the dictionary entry verbatim.
let orig_text = word.span.get_content(source);
if let Some(correct_caps) = dict.get_correct_capitalization_of(orig_text) {
// It should match the dictionary verbatim
output[word.span.start - start_index..word.span.end - start_index]
.iter_mut()
.enumerate()
.for_each(|(idx, c)| *c = correct_caps[idx]);
continue;
}
}
};
let should_capitalize = should_capitalize_token(&word, source, dict)
|| index == 0
|| word_likes.peek().is_none();
@@ -160,6 +176,14 @@ mod tests {
)
}
#[test]
fn fixes_video_press() {
assert_eq!(
make_title_case_str("videopress", &PlainEnglish, &FstDictionary::curated()),
"VideoPress"
)
}
#[quickcheck]
fn a_stays_lowercase(prefix: String, postfix: String) -> TestResult {
// There must be words other than the `a`.

View File

@@ -65,6 +65,7 @@ pub trait TokenStringExt {
create_decl_for!(number);
create_decl_for!(at);
create_decl_for!(ellipsis);
create_decl_for!(hostname);
create_decl_for!(unlintable);
create_decl_for!(sentence_terminator);
create_decl_for!(paragraph_break);
@@ -99,6 +100,7 @@ pub trait TokenStringExt {
impl TokenStringExt for [Token] {
create_fns_for!(word);
create_fns_for!(word_like);
create_fns_for!(hostname);
create_fns_for!(conjunction);
create_fns_for!(space);
create_fns_for!(apostrophe);

View File

@@ -15,7 +15,7 @@ harper-typst = { path = "../harper-typst", version = "0.21.0" }
harper-html = { path = "../harper-html", version = "0.21.0" }
tower-lsp = "0.20.0"
tokio = { version = "1.43.0", features = ["fs", "rt", "rt-multi-thread", "macros", "io-std", "io-util", "net"] }
clap = { version = "4.5.28", features = ["derive"] }
clap = { version = "4.5.29", features = ["derive"] }
once_cell = "1.20.3"
dirs = "6.0.0"
anyhow = "1.0.95"

View File

@@ -235,7 +235,7 @@ addnoun noun:
fi
if [[ "{{noun}}" =~ ^[A-Z] ]]; then
echo "{{noun}}/M" >> $DICT_FILE
echo "{{noun}}/2M" >> $DICT_FILE
else
echo "{{noun}}/SM" >> $DICT_FILE
fi

View File

@@ -1,4 +0,0 @@
{
"useWebWorker": true,
"lintSettings": {}
}

File diff suppressed because it is too large Load Diff

View File

@@ -11,12 +11,12 @@
"start": "node build"
},
"devDependencies": {
"@sveltejs/adapter-node": "^3.0.3",
"@sveltejs/adapter-node": "^5.2.12",
"@sveltejs/kit": "^2.17.1",
"@sveltejs/vite-plugin-svelte": "^5.0.3",
"@types/reveal.js": "^5.0.3",
"autoprefixer": "^10.4.16",
"flowbite": "^3.0.0",
"flowbite": "^3.1.2",
"flowbite-svelte": "^0.44.18",
"postcss": "^8.4.31",
"svelte": "^5.15.0",
@@ -26,7 +26,7 @@
"typescript": "^5.7.3",
"vite": "^6.0.9",
"vite-plugin-pwa": "^0.21.1",
"vite-plugin-top-level-await": "^1.4.4",
"vite-plugin-top-level-await": "^1.5.0",
"vite-plugin-wasm": "^3.3.0"
},
"type": "module",

View File

@@ -2553,22 +2553,23 @@ __metadata:
languageName: node
linkType: hard
"@rollup/plugin-commonjs@npm:^25.0.7":
version: 25.0.8
resolution: "@rollup/plugin-commonjs@npm:25.0.8"
"@rollup/plugin-commonjs@npm:^28.0.1":
version: 28.0.2
resolution: "@rollup/plugin-commonjs@npm:28.0.2"
dependencies:
"@rollup/pluginutils": "npm:^5.0.1"
commondir: "npm:^1.0.1"
estree-walker: "npm:^2.0.2"
glob: "npm:^8.0.3"
fdir: "npm:^6.2.0"
is-reference: "npm:1.2.1"
magic-string: "npm:^0.30.3"
picomatch: "npm:^4.0.2"
peerDependencies:
rollup: ^2.68.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
checksum: 10c0/00d6fe41c33476dcb4b4ac3068f869b8537153646ea18f1fb9d0dfd5592792148567dd735d58ac15e2fdd4ed6c98453d20fe5343105f8cfa93d291198c9a90f5
checksum: 10c0/e90a443e63bfed567d5a4854960240d256818a0b3c69a45e95e196c40a755959406dabe4fbccb886eeb45d3445ddc8f966632563a7d590808be7eee8084384f1
languageName: node
linkType: hard
@@ -2604,6 +2605,24 @@ __metadata:
languageName: node
linkType: hard
"@rollup/plugin-node-resolve@npm:^16.0.0":
version: 16.0.0
resolution: "@rollup/plugin-node-resolve@npm:16.0.0"
dependencies:
"@rollup/pluginutils": "npm:^5.0.1"
"@types/resolve": "npm:1.20.2"
deepmerge: "npm:^4.2.2"
is-module: "npm:^1.0.0"
resolve: "npm:^1.22.1"
peerDependencies:
rollup: ^2.78.0||^3.0.0||^4.0.0
peerDependenciesMeta:
rollup:
optional: true
checksum: 10c0/b63deb6fc14b37070ccaffacc8c10c9720f28ce7632f4fe2ee77064c0c79bcc3fe060fb77160e673c9fd847307252f25a2983030bd54f1888324063c69ae1399
languageName: node
linkType: hard
"@rollup/plugin-replace@npm:^2.4.1":
version: 2.4.2
resolution: "@rollup/plugin-replace@npm:2.4.2"
@@ -3130,17 +3149,17 @@ __metadata:
languageName: node
linkType: hard
"@sveltejs/adapter-node@npm:^3.0.3":
version: 3.0.3
resolution: "@sveltejs/adapter-node@npm:3.0.3"
"@sveltejs/adapter-node@npm:^5.2.12":
version: 5.2.12
resolution: "@sveltejs/adapter-node@npm:5.2.12"
dependencies:
"@rollup/plugin-commonjs": "npm:^25.0.7"
"@rollup/plugin-commonjs": "npm:^28.0.1"
"@rollup/plugin-json": "npm:^6.1.0"
"@rollup/plugin-node-resolve": "npm:^15.2.3"
"@rollup/plugin-node-resolve": "npm:^16.0.0"
rollup: "npm:^4.9.5"
peerDependencies:
"@sveltejs/kit": ^2.0.0
checksum: 10c0/d52f198ebc690fde94195b6a593bc00d2c5dec1ccab8daa55b26c03c1420ee2fd66aa1affa819e8d0663d95755d56ba3c15979fdc5498c661d7851618c4d4b5a
"@sveltejs/kit": ^2.4.0
checksum: 10c0/74d69795ce21511dc29d8bb7b3ef0d473d444f27e9f281c2c8a4edf0242b1579092ee556cbd868b4ebf902a09326df92916edc7d11ccf286f041b284a5532090
languageName: node
linkType: hard
@@ -3312,90 +3331,90 @@ __metadata:
languageName: node
linkType: hard
"@swc/core-darwin-arm64@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-darwin-arm64@npm:1.10.12"
"@swc/core-darwin-arm64@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-darwin-arm64@npm:1.10.16"
conditions: os=darwin & cpu=arm64
languageName: node
linkType: hard
"@swc/core-darwin-x64@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-darwin-x64@npm:1.10.12"
"@swc/core-darwin-x64@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-darwin-x64@npm:1.10.16"
conditions: os=darwin & cpu=x64
languageName: node
linkType: hard
"@swc/core-linux-arm-gnueabihf@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-linux-arm-gnueabihf@npm:1.10.12"
"@swc/core-linux-arm-gnueabihf@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-linux-arm-gnueabihf@npm:1.10.16"
conditions: os=linux & cpu=arm
languageName: node
linkType: hard
"@swc/core-linux-arm64-gnu@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-linux-arm64-gnu@npm:1.10.12"
"@swc/core-linux-arm64-gnu@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-linux-arm64-gnu@npm:1.10.16"
conditions: os=linux & cpu=arm64 & libc=glibc
languageName: node
linkType: hard
"@swc/core-linux-arm64-musl@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-linux-arm64-musl@npm:1.10.12"
"@swc/core-linux-arm64-musl@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-linux-arm64-musl@npm:1.10.16"
conditions: os=linux & cpu=arm64 & libc=musl
languageName: node
linkType: hard
"@swc/core-linux-x64-gnu@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-linux-x64-gnu@npm:1.10.12"
"@swc/core-linux-x64-gnu@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-linux-x64-gnu@npm:1.10.16"
conditions: os=linux & cpu=x64 & libc=glibc
languageName: node
linkType: hard
"@swc/core-linux-x64-musl@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-linux-x64-musl@npm:1.10.12"
"@swc/core-linux-x64-musl@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-linux-x64-musl@npm:1.10.16"
conditions: os=linux & cpu=x64 & libc=musl
languageName: node
linkType: hard
"@swc/core-win32-arm64-msvc@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-win32-arm64-msvc@npm:1.10.12"
"@swc/core-win32-arm64-msvc@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-win32-arm64-msvc@npm:1.10.16"
conditions: os=win32 & cpu=arm64
languageName: node
linkType: hard
"@swc/core-win32-ia32-msvc@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-win32-ia32-msvc@npm:1.10.12"
"@swc/core-win32-ia32-msvc@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-win32-ia32-msvc@npm:1.10.16"
conditions: os=win32 & cpu=ia32
languageName: node
linkType: hard
"@swc/core-win32-x64-msvc@npm:1.10.12":
version: 1.10.12
resolution: "@swc/core-win32-x64-msvc@npm:1.10.12"
"@swc/core-win32-x64-msvc@npm:1.10.16":
version: 1.10.16
resolution: "@swc/core-win32-x64-msvc@npm:1.10.16"
conditions: os=win32 & cpu=x64
languageName: node
linkType: hard
"@swc/core@npm:^1.7.0":
version: 1.10.12
resolution: "@swc/core@npm:1.10.12"
"@swc/core@npm:^1.10.16":
version: 1.10.16
resolution: "@swc/core@npm:1.10.16"
dependencies:
"@swc/core-darwin-arm64": "npm:1.10.12"
"@swc/core-darwin-x64": "npm:1.10.12"
"@swc/core-linux-arm-gnueabihf": "npm:1.10.12"
"@swc/core-linux-arm64-gnu": "npm:1.10.12"
"@swc/core-linux-arm64-musl": "npm:1.10.12"
"@swc/core-linux-x64-gnu": "npm:1.10.12"
"@swc/core-linux-x64-musl": "npm:1.10.12"
"@swc/core-win32-arm64-msvc": "npm:1.10.12"
"@swc/core-win32-ia32-msvc": "npm:1.10.12"
"@swc/core-win32-x64-msvc": "npm:1.10.12"
"@swc/core-darwin-arm64": "npm:1.10.16"
"@swc/core-darwin-x64": "npm:1.10.16"
"@swc/core-linux-arm-gnueabihf": "npm:1.10.16"
"@swc/core-linux-arm64-gnu": "npm:1.10.16"
"@swc/core-linux-arm64-musl": "npm:1.10.16"
"@swc/core-linux-x64-gnu": "npm:1.10.16"
"@swc/core-linux-x64-musl": "npm:1.10.16"
"@swc/core-win32-arm64-msvc": "npm:1.10.16"
"@swc/core-win32-ia32-msvc": "npm:1.10.16"
"@swc/core-win32-x64-msvc": "npm:1.10.16"
"@swc/counter": "npm:^0.1.3"
"@swc/types": "npm:^0.1.17"
peerDependencies:
@@ -3424,7 +3443,7 @@ __metadata:
peerDependenciesMeta:
"@swc/helpers":
optional: true
checksum: 10c0/ce46f64bd66d21dd1fea3afa7f82dcc28520ccac13f2b6c580d37b58b97a3b97281300bed24a20294d3dd4eeb2e50fb5a3e5d15a278aa80a9474e362c83fa5ff
checksum: 10c0/155f4a2db6e1342084fa205883a9628d34ee2c14e511125630f5531fead732de2c4569d40545cd7b8976072cfed0e5ddf5cd72067abd24ce81e237dd90d8ee69
languageName: node
linkType: hard
@@ -6887,15 +6906,15 @@ __metadata:
languageName: node
linkType: hard
"flowbite@npm:^3.0.0":
version: 3.1.1
resolution: "flowbite@npm:3.1.1"
"flowbite@npm:^3.1.2":
version: 3.1.2
resolution: "flowbite@npm:3.1.2"
dependencies:
"@popperjs/core": "npm:^2.9.3"
flowbite-datepicker: "npm:^1.3.1"
mini-svg-data-uri: "npm:^1.4.3"
postcss: "npm:^8.5.1"
checksum: 10c0/4b38a7f696df9f8a59eb40f023b23b5c0bb9eb23268f06bd9929cb9d196ce2f935930949b2b891e0ba1c34b3be2783bc05fcade4394d827adbbdf606ee5b3340
checksum: 10c0/5e617695162e8800f874281fe9ac205cbadc1c1413eccc760efb19c51bc6cfb337e3af70bfda6ed49b586b24e6af01a99b3a87c633688a77924516bc4d08795b
languageName: node
linkType: hard
@@ -7195,19 +7214,6 @@ __metadata:
languageName: node
linkType: hard
"glob@npm:^8.0.3":
version: 8.1.0
resolution: "glob@npm:8.1.0"
dependencies:
fs.realpath: "npm:^1.0.0"
inflight: "npm:^1.0.4"
inherits: "npm:2"
minimatch: "npm:^5.0.1"
once: "npm:^1.3.0"
checksum: 10c0/cb0b5cab17a59c57299376abe5646c7070f8acb89df5595b492dba3bfb43d301a46c01e5695f01154e6553168207cb60d4eaf07d3be4bc3eb9b0457c5c561d0f
languageName: node
linkType: hard
"globals@npm:^11.1.0":
version: 11.12.0
resolution: "globals@npm:11.12.0"
@@ -7296,14 +7302,14 @@ __metadata:
version: 0.0.0-use.local
resolution: "harper-web@workspace:web"
dependencies:
"@sveltejs/adapter-node": "npm:^3.0.3"
"@sveltejs/adapter-node": "npm:^5.2.12"
"@sveltejs/kit": "npm:^2.17.1"
"@sveltejs/vite-plugin-svelte": "npm:^5.0.3"
"@sveltepress/theme-default": "npm:^5.0.7"
"@sveltepress/vite": "npm:^1.1.2"
"@types/reveal.js": "npm:^5.0.3"
autoprefixer: "npm:^10.4.16"
flowbite: "npm:^3.0.0"
flowbite: "npm:^3.1.2"
flowbite-svelte: "npm:^0.44.18"
harper.js: "link:../harper.js"
lodash-es: "npm:^4.17.21"
@@ -7317,7 +7323,7 @@ __metadata:
typescript: "npm:^5.7.3"
vite: "npm:^6.0.9"
vite-plugin-pwa: "npm:^0.21.1"
vite-plugin-top-level-await: "npm:^1.4.4"
vite-plugin-top-level-await: "npm:^1.5.0"
vite-plugin-wasm: "npm:^3.3.0"
languageName: unknown
linkType: soft
@@ -12908,16 +12914,16 @@ __metadata:
languageName: node
linkType: hard
"vite-plugin-top-level-await@npm:^1.4.4":
version: 1.4.4
resolution: "vite-plugin-top-level-await@npm:1.4.4"
"vite-plugin-top-level-await@npm:^1.5.0":
version: 1.5.0
resolution: "vite-plugin-top-level-await@npm:1.5.0"
dependencies:
"@rollup/plugin-virtual": "npm:^3.0.2"
"@swc/core": "npm:^1.7.0"
"@swc/core": "npm:^1.10.16"
uuid: "npm:^10.0.0"
peerDependencies:
vite: ">=2.8"
checksum: 10c0/013e7b2e28632d93d04c4061187198e699064fc208a1657c100354b32da30921fa835879fc17779d5e0b074855237408da2fadd720fa0f4571137427a1efd5e3
checksum: 10c0/e582091e9cb020415d243937073a137bf30b6a2fd9af85fa7f96610d113ecab7bdb1aed2c4f35df22c54aaf4209be4d607bcb7de532e04e4e2619f9e22e395f6
languageName: node
linkType: hard