feat(core): create rule to title-case headings (#2297)

* feat(core): introduce `Heading` `TokenKind`

* feat(core): create `iter_headings` extension

* refactor(core): remove unhelpful tests

* chore(core): update snapshots

* refactor(core): avoid allocation where possible

* feat(core): create rule that enforces title case for headings

* fix(core): NP members should be capitalized

* fix(core): appease clippy

* feat(harper.js): allow `harper.js` to force headings

* fix(harper.js): make `organized_lints` similar: allow forced headings

* fix(harper.js): appease Biome

* feat(chrome-ext): expose title casing in Chrome extension

* fix(chrome-ext): content script console bloat

* test(core): title_case module more extensively; fix things

* refactor(core): remove useless branch

* refactor(core): use tokens over characters

* refactor(core): use token indices instead of spans
This commit is contained in:
Elijah Potter
2025-12-05 13:35:00 -07:00
committed by GitHub
parent 1301d20400
commit bfaa324c07
43 changed files with 1181 additions and 344 deletions

View File

@@ -43,9 +43,9 @@ pub struct DictWordMetadata {
pub common: bool,
#[serde(default = "default_none")]
pub derived_from: Option<WordId>,
/// Generated by a chunker
/// Generated by a chunker. Declares whether the word is a member of a nominal phrase.
pub np_member: Option<bool>,
/// Generated by a POS tagger
/// Generated by a POS tagger. Declares what it inferred the word's part of speech to be.
pub pos_tag: Option<UPOS>,
}

View File

@@ -918,6 +918,7 @@ impl TokenStringExt for Document {
create_fns_on_doc!(verb);
create_fns_on_doc!(word);
create_fns_on_doc!(word_like);
create_fns_on_doc!(heading_start);
fn first_sentence_word(&self) -> Option<&Token> {
self.tokens.first_sentence_word()
@@ -947,6 +948,10 @@ impl TokenStringExt for Document {
self.tokens.iter_paragraphs()
}
fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
self.tokens.iter_headings()
}
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
self.tokens.iter_sentences()
}

View File

@@ -110,7 +110,7 @@ mod tests {
#[test]
fn multiple_dollar() {
assert_suggestion_result(
"They were either 25$ 24$ or 23$.",
"They were either 25\\$ 24\\$ or 23\\$.",
CurrencyPlacement::default(),
"They were either $25 $24 or $23.",
);

View File

@@ -185,6 +185,7 @@ use super::touristic::Touristic;
use super::unclosed_quotes::UnclosedQuotes;
use super::update_place_names::UpdatePlaceNames;
use super::use_genitive::UseGenitive;
use super::use_title_case::UseTitleCase;
use super::verb_to_adjective::VerbToAdjective;
use super::very_unique::VeryUnique;
use super::vice_versa::ViceVersa;
@@ -691,6 +692,9 @@ impl LintGroup {
out.add("MassPlurals", MassPlurals::new(dictionary.clone()));
out.config.set_rule_enabled("MassPlurals", true);
out.add("UseTitleCase", UseTitleCase::new(dictionary.clone()));
out.config.set_rule_enabled("UseTitleCase", true);
out.add_chunk_expr_linter(
"DisjointPrefixes",
DisjointPrefixes::new(dictionary.clone()),

View File

@@ -198,6 +198,7 @@ mod touristic;
mod unclosed_quotes;
mod update_place_names;
mod use_genitive;
mod use_title_case;
mod verb_to_adjective;
mod very_unique;
mod vice_versa;
@@ -254,7 +255,8 @@ where
#[cfg(test)]
pub mod tests {
use crate::{Document, Span, Token, parsers::PlainEnglish};
use crate::parsers::Markdown;
use crate::{Document, Span, Token};
use hashbrown::HashSet;
/// Extension trait for converting spans of tokens back to their original text
@@ -479,7 +481,7 @@ pub mod tests {
loop {
let test = Document::new_from_vec(
text_chars.clone().into(),
&PlainEnglish,
&Markdown::default(),
&FstDictionary::curated(),
);
let lints = linter.lint(&test);

View File

@@ -0,0 +1,69 @@
use crate::{Document, TokenStringExt, spell::Dictionary, title_case::try_make_title_case};
use super::{Lint, LintKind, Linter, Suggestion};
pub struct UseTitleCase<D: Dictionary + 'static> {
dict: D,
}
impl<D: Dictionary + 'static> UseTitleCase<D> {
pub fn new(dict: D) -> Self {
Self { dict }
}
}
impl<D: Dictionary + 'static> Linter for UseTitleCase<D> {
fn lint(&mut self, document: &Document) -> Vec<Lint> {
let mut lints = Vec::new();
for heading in document.iter_headings() {
let Some(span) = heading.span() else {
continue;
};
if let Some(title_case) =
try_make_title_case(heading, document.get_source(), &self.dict)
{
lints.push(Lint {
span,
lint_kind: LintKind::Capitalization,
suggestions: vec![Suggestion::ReplaceWith(title_case)],
message: "Try to use title case in headings.".to_owned(),
priority: 127,
});
}
}
lints
}
fn description(&self) -> &str {
"Prompts you to use title case in relevant headings."
}
}
#[cfg(test)]
mod tests {
use crate::linting::tests::assert_suggestion_result;
use crate::spell::FstDictionary;
use super::UseTitleCase;
#[test]
fn simple_correction() {
assert_suggestion_result(
"# This is a title",
UseTitleCase::new(FstDictionary::curated()),
"# This Is a Title",
);
}
#[test]
fn double_correction() {
assert_suggestion_result(
"# This is a title\n\n## This is a subtitle",
UseTitleCase::new(FstDictionary::curated()),
"# This Is a Title\n\n## This Is a Subtitle",
);
}
}

View File

@@ -204,7 +204,16 @@ impl Parser for Markdown {
});
stack.push(pulldown_cmark::Tag::List(v));
}
pulldown_cmark::Event::Start(tag) => stack.push(tag),
pulldown_cmark::Event::Start(tag) => {
if matches!(tag, pulldown_cmark::Tag::Heading { .. }) {
tokens.push(Token {
span: Span::new_with_len(span_start, 0),
kind: TokenKind::HeadingStart,
});
}
stack.push(tag)
}
pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Paragraph)
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Item)
| pulldown_cmark::Event::End(pulldown_cmark::TagEnd::Heading(_))
@@ -575,4 +584,28 @@ Paragraph.
let parser = Markdown::new(opts);
let _res = parser.parse_str("//{@j");
}
#[test]
fn simple_headings_are_marked() {
let opts = MarkdownOptions::default();
let parser = Markdown::new(opts);
let tokens = parser.parse_str("# This is a simple heading");
assert_eq!(tokens.iter_heading_starts().count(), 1);
assert_eq!(tokens.iter_headings().count(), 1);
}
#[test]
fn multiple_headings_are_marked() {
let opts = MarkdownOptions::default();
let parser = Markdown::new(opts);
let tokens = parser.parse_str(
r#"# This is a simple heading
## This is a second simple heading"#,
);
assert_eq!(tokens.iter_heading_starts().count(), 2);
assert_eq!(tokens.iter_headings().count(), 2);
}
}

View File

@@ -4,6 +4,7 @@ mod collapse_identifiers;
mod isolate_english;
mod markdown;
mod mask;
mod oops_all_headings;
mod org_mode;
mod plain_english;
@@ -12,6 +13,7 @@ pub use collapse_identifiers::CollapseIdentifiers;
pub use isolate_english::IsolateEnglish;
pub use markdown::{Markdown, MarkdownOptions};
pub use mask::Mask;
pub use oops_all_headings::OopsAllHeadings;
pub use org_mode::OrgMode;
pub use plain_english::PlainEnglish;

View File

@@ -0,0 +1,49 @@
use crate::{Span, Token, TokenKind};
use super::Parser;
/// A parser that wraps another, forcing the entirety of the document to be composed of headings.
pub struct OopsAllHeadings<P: Parser + 'static> {
inner: P,
}
impl<P: Parser + 'static> OopsAllHeadings<P> {
pub fn new(inner: P) -> Self {
Self { inner }
}
}
impl<P: Parser + 'static> Parser for OopsAllHeadings<P> {
fn parse(&self, source: &[char]) -> Vec<Token> {
let inner = self.inner.parse(source);
let mut output = Vec::with_capacity(inner.capacity());
output.push(Token {
span: Span::default(),
kind: TokenKind::HeadingStart,
});
let mut iter = inner.into_iter().peekable();
while let Some(tok) = iter.next() {
let heading_start = if tok.kind.is_paragraph_break()
&& iter.peek().is_some_and(|t| !t.kind.is_heading_start())
{
Some(Token {
span: Span::new_with_len(tok.span.end, 0),
kind: TokenKind::HeadingStart,
})
} else {
None
};
output.push(tok);
if let Some(extra) = heading_start {
output.push(extra);
}
}
output
}
}

View File

@@ -6,6 +6,7 @@ use crate::TokenKind;
use hashbrown::HashSet;
use lazy_static::lazy_static;
use crate::Punctuation;
use crate::spell::Dictionary;
use crate::{CharStringExt, Document, TokenStringExt, parsers::Parser};
@@ -27,17 +28,46 @@ pub fn make_title_case_chars(
make_title_case(document.get_tokens(), source.as_slice(), dict)
}
pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec<char> {
pub fn try_make_title_case(
toks: &[Token],
source: &[char],
dict: &impl Dictionary,
) -> Option<Vec<char>> {
if toks.is_empty() {
return Vec::new();
return None;
}
let start_index = toks.first().unwrap().span.start;
let relevant_text = toks.span().unwrap().get_content(source);
let mut word_likes = toks.iter_word_likes().enumerate().peekable();
let mut output = toks.span().unwrap().get_content(source).to_vec();
let mut word_likes = toks.iter_word_like_indices().enumerate().peekable();
let mut output = None;
let mut previous_word_index = 0;
// Checks if the output if the provided char is different from the source. If so, it will
// set the output. The goal here is to avoid allocating if no edits must be made.
let mut set_output_char = |idx: usize, new_char: char| {
if output
.as_ref()
.is_some_and(|o: &Vec<char>| o[idx] != new_char)
|| relevant_text[idx] != new_char
{
if output.is_none() {
output = Some(relevant_text.to_vec())
}
let Some(mutable) = &mut output else {
panic!("We just set output to `Some`. This should be impossible.");
};
mutable[idx] = new_char;
}
};
while let Some((index, word_idx)) = word_likes.next() {
let word = &toks[word_idx];
while let Some((index, word)) = word_likes.next() {
if let Some(Some(metadata)) = word.kind.as_word()
&& metadata.is_proper_noun()
{
@@ -46,59 +76,87 @@ pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary)
if let Some(correct_caps) = dict.get_correct_capitalization_of(orig_text) {
// It should match the dictionary verbatim
output[word.span.start - start_index..word.span.end - start_index]
.iter_mut()
.enumerate()
.for_each(|(idx, c)| *c = correct_caps[idx]);
for (i, c) in correct_caps.iter().enumerate() {
set_output_char(word.span.start - start_index + i, *c);
}
}
};
let should_capitalize = should_capitalize_token(word, source, dict)
// Capitalize the first word following a colon to match Chicago style.
let is_after_colon = toks[previous_word_index..word_idx]
.iter()
.any(|tok| matches!(tok.kind, TokenKind::Punctuation(Punctuation::Colon)));
let should_capitalize = is_after_colon
|| should_capitalize_token(word, source)
|| index == 0
|| word_likes.peek().is_none();
if should_capitalize {
output[word.span.start - start_index] =
output[word.span.start - start_index].to_ascii_uppercase();
set_output_char(
word.span.start - start_index,
relevant_text[word.span.start - start_index].to_ascii_uppercase(),
);
} else {
// The whole word should be lowercase.
for i in word.span {
output[i - start_index] = output[i - start_index].to_ascii_lowercase();
set_output_char(
i - start_index,
relevant_text[i - start_index].to_ascii_lowercase(),
);
}
}
previous_word_index = word_idx
}
if let Some(output) = &output
&& output.as_slice() == relevant_text
{
return None;
}
output
}
pub fn make_title_case(toks: &[Token], source: &[char], dict: &impl Dictionary) -> Vec<char> {
try_make_title_case(toks, source, dict)
.unwrap_or_else(|| toks.span().unwrap_or_default().get_content(source).to_vec())
}
/// Determines whether a token should be capitalized.
/// Is not responsible for capitalization requirements that are dependent on token position.
fn should_capitalize_token(tok: &Token, source: &[char], dict: &impl Dictionary) -> bool {
fn should_capitalize_token(tok: &Token, source: &[char]) -> bool {
match &tok.kind {
TokenKind::Word(Some(metadata)) => {
// Only specific conjunctions are not capitalized.
lazy_static! {
static ref SPECIAL_CONJUNCTIONS: HashSet<Vec<char>> =
["and", "but", "for", "or", "nor"]
["and", "but", "for", "or", "nor", "as"]
.iter()
.map(|v| v.chars().collect())
.collect();
static ref SPECIAL_ARTICLES: HashSet<Vec<char>> = ["a", "an", "the"]
.iter()
.map(|v| v.chars().collect())
.collect();
}
let chars = tok.span.get_content(source);
let chars_lower = chars.to_lower();
let mut metadata = Cow::Borrowed(metadata);
if let Some(metadata_lower) = dict.get_word_metadata(&chars_lower) {
metadata = Cow::Owned(metadata.clone().or(&metadata_lower));
}
let metadata = Cow::Borrowed(metadata);
let is_short_preposition = metadata.preposition && tok.span.len() <= 4;
if chars_lower.as_ref() == ['a', 'l', 'l'] {
return true;
}
!is_short_preposition
&& !metadata.is_determiner()
&& !metadata.is_non_possessive_determiner()
&& !SPECIAL_CONJUNCTIONS.contains(chars_lower.as_ref())
&& !SPECIAL_ARTICLES.contains(chars_lower.as_ref())
}
_ => true,
}
@@ -267,4 +325,184 @@ mod tests {
"I Spoke at WordCamp U.S. in 2025",
);
}
#[test]
fn fixes_your_correctly() {
assert_eq!(
make_title_case_str(
"it is not your friend",
&PlainEnglish,
&FstDictionary::curated()
),
"It Is Not Your Friend",
);
}
#[test]
fn handles_old_man_and_the_sea() {
assert_eq!(
make_title_case_str(
"the old man and the sea",
&PlainEnglish,
&FstDictionary::curated()
),
"The Old Man and the Sea",
);
}
#[test]
fn handles_great_story_with_subtitle() {
assert_eq!(
make_title_case_str(
"the great story: a tale of two cities",
&PlainEnglish,
&FstDictionary::curated()
),
"The Great Story: A Tale of Two Cities",
);
}
#[test]
fn handles_lantern_and_moths() {
assert_eq!(
make_title_case_str(
"lantern flickered; moths began their worship",
&PlainEnglish,
&FstDictionary::curated()
),
"Lantern Flickered; Moths Began Their Worship",
);
}
#[test]
fn handles_static_with_ghosts() {
assert_eq!(
make_title_case_str(
"static filled the room with ghosts",
&PlainEnglish,
&FstDictionary::curated()
),
"Static Filled the Room with Ghosts",
);
}
#[test]
fn handles_glass_trembled_before_thunder() {
assert_eq!(
make_title_case_str(
"glass trembled before thunder arrived.",
&PlainEnglish,
&FstDictionary::curated()
),
"Glass Trembled Before Thunder Arrived.",
);
}
#[test]
fn handles_hepatitis_b_shots() {
assert_eq!(
make_title_case_str(
"an end to hepatitis b shots for all newborns",
&PlainEnglish,
&FstDictionary::curated()
),
"An End to Hepatitis B Shots for All Newborns",
);
}
#[test]
fn handles_trump_approval_rating() {
assert_eq!(
make_title_case_str(
"trump's approval rating dips as views of his handling of the economy sour",
&PlainEnglish,
&FstDictionary::curated()
),
"Trump's Approval Rating Dips as Views of His Handling of the Economy Sour",
);
}
#[test]
fn handles_last_door() {
assert_eq!(
make_title_case_str("the last door", &PlainEnglish, &FstDictionary::curated()),
"The Last Door",
);
}
#[test]
fn handles_midnight_river() {
assert_eq!(
make_title_case_str("midnight river", &PlainEnglish, &FstDictionary::curated()),
"Midnight River",
);
}
#[test]
fn handles_a_quiet_room() {
assert_eq!(
make_title_case_str("a quiet room", &PlainEnglish, &FstDictionary::curated()),
"A Quiet Room",
);
}
#[test]
fn handles_broken_map() {
assert_eq!(
make_title_case_str("broken map", &PlainEnglish, &FstDictionary::curated()),
"Broken Map",
);
}
#[test]
fn handles_fire_in_autumn() {
assert_eq!(
make_title_case_str("fire in autumn", &PlainEnglish, &FstDictionary::curated()),
"Fire in Autumn",
);
}
#[test]
fn handles_hidden_path() {
assert_eq!(
make_title_case_str("the hidden path", &PlainEnglish, &FstDictionary::curated()),
"The Hidden Path",
);
}
#[test]
fn handles_under_blue_skies() {
assert_eq!(
make_title_case_str("under blue skies", &PlainEnglish, &FstDictionary::curated()),
"Under Blue Skies",
);
}
#[test]
fn handles_lost_and_found() {
assert_eq!(
make_title_case_str("lost and found", &PlainEnglish, &FstDictionary::curated()),
"Lost and Found",
);
}
#[test]
fn handles_silent_watcher() {
assert_eq!(
make_title_case_str(
"the silent watcher",
&PlainEnglish,
&FstDictionary::curated()
),
"The Silent Watcher",
);
}
#[test]
fn handles_winter_road() {
assert_eq!(
make_title_case_str("winter road", &PlainEnglish, &FstDictionary::curated()),
"Winter Road",
);
}
}

View File

@@ -50,6 +50,7 @@ pub enum TokenKind {
Unlintable,
ParagraphBreak,
Regexish,
HeadingStart,
}
impl TokenKind {

View File

@@ -88,6 +88,7 @@ pub trait TokenStringExt: private::Sealed {
create_decl_for!(verb);
create_decl_for!(word);
create_decl_for!(word_like);
create_decl_for!(heading_start);
fn iter_linking_verb_indices(&self) -> impl Iterator<Item = usize> + '_;
fn iter_linking_verbs(&self) -> impl Iterator<Item = &Token> + '_;
@@ -106,6 +107,12 @@ pub trait TokenStringExt: private::Sealed {
/// paragraphs in a document.
fn iter_paragraphs(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
/// Get an iterator over token slices that represent headings.
///
/// A heading begins with a [`TokenKind::HeadingStart`] token and ends with
/// the next [`TokenKind::ParagraphBreak`].
fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
/// Get an iterator over token slices that represent the individual
/// sentences in a document.
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
@@ -139,6 +146,7 @@ impl TokenStringExt for [Token] {
create_fns_for!(verb);
create_fns_for!(word_like);
create_fns_for!(word);
create_fns_for!(heading_start);
fn first_non_whitespace(&self) -> Option<&Token> {
self.iter().find(|t| !t.kind.is_whitespace())
@@ -230,6 +238,17 @@ impl TokenStringExt for [Token] {
first_pg.into_iter().chain(rest).chain(last_pg)
}
fn iter_headings(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
self.iter_heading_start_indices().map(|start| {
let end = self[start..]
.iter()
.position(|t| t.kind.is_paragraph_break())
.unwrap_or(self[start..].len() - 1);
&self[start..=start + end]
})
}
fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
let first_sentence = self
.iter_sentence_terminator_indices()

View File

@@ -302,6 +302,7 @@ fn format_tag(kind: &TokenKind) -> Cow<'static, str> {
TokenKind::Unlintable => Cow::Borrowed("Unlintable"),
TokenKind::Regexish => Cow::Borrowed("Regexish"),
TokenKind::ParagraphBreak => Cow::Borrowed("ParagraphBreak"),
TokenKind::HeadingStart => Cow::Borrowed("HeadingStart"),
}
}

View File

@@ -92,6 +92,8 @@ create_test!(issue_1988.md, 0, Dialect::American);
create_test!(issue_2054_clean.md, 0, Dialect::British);
create_test!(issue_1873.md, 0, Dialect::British);
create_test!(issue_2246.md, 0, Dialect::American);
create_test!(title_case_errors.md, 2, Dialect::American);
create_test!(title_case_clean.md, 0, Dialect::American);
create_test!(issue_2233.md, 0, Dialect::American);
create_test!(issue_2240.md, 0, Dialect::American);
// It just matters that it is > 1

View File

@@ -0,0 +1,7 @@
# Here, We Try to Test Our Title-Casing Feature
It should only pay attention to headings.
## Maybe It Works?
There will be a similar file with the corrected headings.

View File

@@ -0,0 +1,7 @@
# Here, we try to test our title-casing feature
It should only pay attention to headings.
## Maybe it works?
There will be a similar file with the corrected headings.

View File

@@ -1,4 +1,4 @@
# This is a big heading, with a lot of words
# This Is a Big Heading, with a Lot of Words
- New here's a list, this part doesn't have as many words
- But this part does, it has so many words, more words than you could ever dream of

View File

@@ -985,6 +985,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
558 | ## CHAPTER IV: The Rabbit Sends in a Little Bill
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## CHAPTER IV: The Rabbit Sends in a little Bill”
Lint: Readability (127 priority)
Message: |
564 | wonder?” Alice guessed in a moment that it was looking for the fan and the pair
@@ -1417,6 +1426,15 @@ Message: |
Lint: Capitalization (127 priority)
Message: |
796 | ## CHAPTER V: Advice from a Caterpillar
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## CHAPTER v: Advice from a Caterpillar”
Lint: Readability (127 priority)
Message: |
822 | “Well, perhaps you havent found it so yet,” said Alice; “but when you have to
@@ -1639,6 +1657,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
1059 | ## CHAPTER VI: Pig and Pepper
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## CHAPTER Vi: Pig and Pepper”
Lint: Readability (127 priority)
Message: |
1061 | For a minute or two she stood looking at the house, and wondering what to do

View File

@@ -1,3 +1,12 @@
Lint: Capitalization (127 priority)
Message: |
6 | # Computer science
| ^~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “# Computer Science”
Lint: Style (31 priority)
Message: |
27 | problem-solving, decision-making, environmental adaptation, planning and
@@ -197,6 +206,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
102 | ## Etymology and scope
| ^~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## Etymology and Scope”
Lint: Readability (127 priority)
Message: |
104 | Although first proposed in 1956, the term "computer science" appears in a 1959
@@ -594,6 +612,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
178 | ### Epistemology of computer science
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Epistemology of Computer Science”
Lint: Spelling (63 priority)
Message: |
181 | computer science is a discipline of science, mathematics, or engineering. Allen
@@ -682,6 +709,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
211 | ### Paradigms of computer science
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Paradigms of Computer Science”
Lint: Spelling (63 priority)
Message: |
214 | separate paradigms in computer science. Peter Wegner argued that those paradigms
@@ -824,6 +860,24 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
243 | ### Theoretical computer science
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Theoretical Computer Science”
Lint: Capitalization (127 priority)
Message: |
250 | #### Theory of computation
| ^~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Theory of Computation”
Lint: Spelling (63 priority)
Message: |
252 | According to Peter Denning, the fundamental question underlying computer science
@@ -855,6 +909,33 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
265 | #### Information and coding theory
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Information and Coding Theory”
Lint: Capitalization (127 priority)
Message: |
277 | #### Data structures and algorithms
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Data Structures and Algorithms”
Lint: Capitalization (127 priority)
Message: |
282 | #### Programming language theory and formal methods
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Programming Language Theory and Formal Methods”
Lint: Agreement (31 priority)
Message: |
286 | programming languages and their individual features. It falls within the
@@ -891,6 +972,33 @@ Message: |
Lint: Capitalization (127 priority)
Message: |
308 | ### Applied computer science
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Applied Computer Science”
Lint: Capitalization (127 priority)
Message: |
310 | #### Computer graphics and visualization
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Computer Graphics and Visualization”
Lint: Capitalization (127 priority)
Message: |
318 | #### Image and sound processing
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Image and Sound Processing”
Lint: Style (31 priority)
Message: |
320 | Information can take the form of images, sound, video or other multimedia. Bits
@@ -920,6 +1028,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
330 | #### Computational science, finance and engineering
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Computational Science, Finance and Engineering”
Lint: Style (31 priority)
Message: |
330 | #### Computational science, finance and engineering
@@ -929,6 +1046,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
344 | #### Humancomputer interaction
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### HumanComputer Interaction”
Lint: Spelling (63 priority)
Message: |
346 | Humancomputer interaction (HCI) is the field of study and research concerned
@@ -952,6 +1078,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
352 | #### Software engineering
| ^~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Software Engineering”
Lint: Punctuation (31 priority)
Message: |
360 | maintenance. For example software testing, systems engineering, technical debt
@@ -961,6 +1096,33 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
363 | #### Artificial intelligence
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Artificial Intelligence”
Lint: Capitalization (127 priority)
Message: |
383 | ### Computer systems
| ^~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Computer Systems”
Lint: Capitalization (127 priority)
Message: |
385 | #### Computer architecture and microarchitecture
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Computer Architecture and Microarchitecture”
Lint: Spelling (63 priority)
Message: |
393 | term "architecture" in computer literature can be traced to the work of Lyle R.
@@ -983,6 +1145,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
397 | #### Concurrent, parallel and distributed computing
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Concurrent, Parallel and Distributed Computing”
Lint: Spelling (63 priority)
Message: |
401 | mathematical models have been developed for general concurrent computation
@@ -995,6 +1166,24 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
408 | #### Computer networks
| ^~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Computer Networks”
Lint: Capitalization (127 priority)
Message: |
413 | #### Computer security and cryptography
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Computer Security and Cryptography”
Lint: WordChoice (127 priority)
Message: |
421 | Modern cryptography is the scientific study of problems relating to distributed
@@ -1005,6 +1194,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
427 | #### Databases and data mining
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Databases and Data Mining”
Lint: WordChoice (63 priority)
Message: |
432 | languages. Data mining is a process of discovering patterns in large data sets.
@@ -1149,6 +1347,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
471 | ## Programming paradigms
| ^~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## Programming Paradigms”
Lint: Punctuation (31 priority)
Message: |
490 | the data fields of the object with which they are associated. Thus

View File

@@ -1,3 +1,12 @@
Lint: Capitalization (127 priority)
Message: |
1 | # Difficult sentences
| ^~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “# Difficult Sentences”
Lint: Capitalization (31 priority)
Message: |
20 | at the bottom of the page; sitting at the table; at church; at sea

View File

@@ -1,3 +1,12 @@
Lint: Capitalization (127 priority)
Message: |
6 | # Part-of-speech tagging
| ^~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “# Part-of-Speech Tagging”
Lint: Readability (127 priority)
Message: |
8 | In corpus linguistics, part-of-speech tagging (POS tagging or PoS tagging or
@@ -71,6 +80,15 @@ Message: |
Lint: Capitalization (127 priority)
Message: |
39 | ### Tag sets
| ^~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Tag Sets”
Lint: Spelling (127 priority)
Message: |
43 | However, there are clearly many more categories and sub-categories. For nouns,
@@ -286,6 +304,15 @@ Message: |
Lint: Capitalization (127 priority)
Message: |
117 | ### Use of hidden Markov models
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Use of Hidden Markov Models”
Lint: Spelling (63 priority)
Message: |
119 | In the mid-1980s, researchers in Europe began to use hidden Markov models (HMMs)
@@ -390,6 +417,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
157 | ### Dynamic programming methods
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “### Dynamic Programming Methods”
Lint: Spelling (63 priority)
Message: |
159 | In 1987, Steven DeRose and Kenneth W. Church independently developed dynamic
@@ -499,6 +535,15 @@ Message: |
Lint: Capitalization (127 priority)
Message: |
182 | #### Unsupervised taggers
| ^~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Unsupervised Taggers”
Lint: Spelling (127 priority)
Message: |
184 | The methods already discussed involve working from a pre-existing corpus to
@@ -509,6 +554,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
198 | #### Other taggers and methods
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “#### Other Taggers and Methods”
Lint: Spelling (63 priority)
Message: |
200 | Some current major algorithms for part-of-speech tagging include the Viterbi

View File

@@ -1,3 +1,12 @@
Lint: Capitalization (127 priority)
Message: |
3 | # The Constitution Of The United States Of America
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “# The Constitution of the United States of America”
Lint: Readability (127 priority)
Message: |
5 | **We the People** of the United States, in Order to form a more perfect Union,
@@ -1776,6 +1785,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
677 | ## Article. VI.
| ^~~~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## Article. Vi.”
Lint: Readability (127 priority)
Message: |
683 | This Constitution, and the Laws of the United States which shall be made in

View File

@@ -4438,6 +4438,15 @@ Suggest:
Lint: Capitalization (127 priority)
Message: |
3003 | ## CHAPTER VI
| ^~~~~~~~~~~~~ Try to use title case in headings.
Suggest:
- Replace with: “## CHAPTER Vi”
Lint: Readability (127 priority)
Message: |
3020 | short of being news. Contemporary legends such as the “underground pipe-line to

View File

@@ -1,5 +1,5 @@
> Alices Adventures in Wonderland
# NSg$ NPl/V3 NPr/J/R/P NSg+
> Alices Adventures in Wonderland
# HeadingStart NSg$ NPl/V3 NPr/J/R/P NSg+
>
#
> by Lewis Carroll
@@ -10,8 +10,8 @@
# D NSg+ NSg NSg+ #
>
#
> CHAPTER I : Down the Rabbit - Hole
# NSg/VB+ ISg/#r+ . N🅪Sg/VB/J/P D NSg/VB+ . NSg/VB+
> CHAPTER I : Down the Rabbit - Hole
# HeadingStart NSg/VB+ ISg/#r+ . N🅪Sg/VB/J/P D NSg/VB+ . NSg/VB+
>
#
> Alice was beginning to get very tired of sitting by her sister on the bank , and
@@ -370,8 +370,8 @@
# NSg/I/J/R/C ISg+ NPr/VBP/J P N🅪Sg/VB . VB/C J/R J/R VP/J NSg/VB/J/P D+ N🅪Sg/VB+ .
>
#
> CHAPTER II : The Pool of Tears
# NSg/VB+ #r . D NSg/VB P NPl/V3+
> CHAPTER II : The Pool of Tears
# HeadingStart NSg/VB+ #r . D NSg/VB P NPl/V3+
>
#
> “ Curiouser and curiouser ! ” cried Alice ( she was so much surprised , that for the
@@ -728,8 +728,8 @@
# NSg/VB/J+ VPt P D+ NSg/VB+ .
>
#
> CHAPTER III : A Caucus - Race and a Long Tale
# NSg/VB+ #r . D/P NSg/VB+ . N🅪Sg/VB VB/C D/P+ NPr/VB/J+ NSg/VB+
> CHAPTER III : A Caucus - Race and a Long Tale
# HeadingStart NSg/VB+ #r . D/P NSg/VB+ . N🅪Sg/VB VB/C D/P NPr/VB/J NSg/VB+
>
#
> They were indeed a queer - looking party that assembled on the bank — the birds with
@@ -1114,8 +1114,8 @@
# NSg/VB+ . VB/C VPt Nᴹ/Vg/J NSg/VB/J P NSg/VB ISg/D$+ NSg/VB+ .
>
#
> CHAPTER IV : The Rabbit Sends in a Little Bill
# NSg/VB+ NSg/J/#r+ . D+ NSg/VB+ NPl/V3 NPr/J/R/P D/P+ NPr/I/J/Dq+ NPr/VB+
> CHAPTER IV : The Rabbit Sends in a Little Bill
# HeadingStart NSg/VB+ NSg/J/#r+ . D NSg/VB+ NPl/V3 NPr/J/R/P D/P NPr/I/J/Dq NPr/VB+
>
#
> It was the White Rabbit , trotting slowly back again , and looking anxiously about
@@ -1590,8 +1590,8 @@
# D JS NSg/VB P ISg/D$+ NPr/C P NSg/I/VB+ NSg/J/C .
>
#
> CHAPTER V : Advice from a Caterpillar
# NSg/VB+ NSg/P/#r . Nᴹ+ P D/P NSg/VB
> CHAPTER V : Advice from a Caterpillar
# HeadingStart NSg/VB+ NSg/P/#r . Nᴹ+ P D/P NSg/VB
>
#
> The Caterpillar and Alice looked at each other for some time in silence : at last
@@ -2116,8 +2116,8 @@
# VB VP ISg+ N🅪Sg/VB/J/P P NSg NPl/V3+ NSg/VB/J/R .
>
#
> CHAPTER VI : Pig and Pepper
# NSg/VB+ NPr/#r . NSg/VB VB/C N🅪Sg/VB+
> CHAPTER VI : Pig and Pepper
# HeadingStart NSg/VB+ NPr/#r . NSg/VB VB/C N🅪Sg/VB+
>
#
> For a minute or two she stood looking at the house , and wondering what to do
@@ -2706,8 +2706,8 @@
# P NSg/I/J/C/Dq . ISg/#r+ R NSg/VB K VPp/J/P P NSg/VB D NSg/VB R . .
>
#
> CHAPTER VII : A Mad Tea - Party
# NSg/VB+ NSg/#r . D/P NSg/VB/J N🅪Sg/VB+ . NSg/VB/J+
> CHAPTER VII : A Mad Tea - Party
# HeadingStart NSg/VB+ NSg/#r . D/P NSg/VB/J N🅪Sg/VB+ . NSg/VB/J+
>
#
> There was a table set out under a tree in front of the house , and the March Hare
@@ -3334,8 +3334,8 @@
# NPr/VB/J NSg/VB+ . NPl/V3+ VB/C D NSg/VB/J NPl/V3 .
>
#
> CHAPTER VIII : The Queens Croquet - Ground
# NSg/VB+ #r . D NSg$ NSg/VB . N🅪Sg/VB/J+
> CHAPTER VIII : The Queens Croquet - Ground
# HeadingStart NSg/VB+ #r . D NSg$ NSg/VB . N🅪Sg/VB/J+
>
#
> A large rose - tree stood near the entrance of the garden : the roses growing on it
@@ -3896,8 +3896,8 @@
# NSg/VPt NSg/VB/J P D NSg/VB/J+ .
>
#
> CHAPTER IX : The Mock Turtles Story
# NSg/VB+ #r . D NSg/VB/J NSg$ NSg/VB+
> CHAPTER IX : The Mock Turtles Story
# HeadingStart NSg/VB+ #r . D NSg/VB/J NSg$ NSg/VB+
>
#
> “ You cant think how glad I am to see you again , you dear old thing ! ” said the
@@ -4486,8 +4486,8 @@
# . NPr/VB ISg/D$+ NSg/I/J+ J/P D NPl/V3+ NSg/J/R/C . .
>
#
> CHAPTER X : The Lobster Quadrille
# NSg/VB+ NPr/J/#r+ . D+ NSg/VB/J+ NSg/VB/J
> CHAPTER X : The Lobster Quadrille
# HeadingStart NSg/VB+ NPr/J/#r+ . D+ NSg/VB/J+ NSg/VB/J
>
#
> The Mock Turtle sighed deeply , and drew the back of one flapper across his eyes .
@@ -5008,8 +5008,8 @@
# . ? . Nᴹ P D NPr/I+ . NPr/I+ . N🅪Sg/Vg/J+ . NSg/J . NSg/J N🅪Sg/VB+ . .
>
#
> CHAPTER XI : Who Stole the Tarts ?
# NSg/VB+ NSg/#r . NPr/I+ NSg/VPt D NPl/V3 .
> CHAPTER XI : Who Stole the Tarts ?
# HeadingStart NSg/VB+ NSg/#r . NPr/I+ NSg/VPt D NPl/V3 .
>
#
> The King and Queen of Hearts were seated on their throne when they arrived , with
@@ -5488,8 +5488,8 @@
# NSg/P D NSg/VB/J P ISg/D$+ NSg/VB/J NPr/I/J/Dq+ NSg/VB+ . D+ NSg/VB+ . NPr+ . .
>
#
> CHAPTER XII : Alices Evidence
# NSg/VB+ #r . NSg$ Nᴹ/VB+
> CHAPTER XII : Alices Evidence
# HeadingStart NSg/VB+ #r . NSg$ Nᴹ/VB+
>
#
> “ Here ! ” cried Alice , quite forgetting in the flurry of the moment how large she

View File

@@ -6,8 +6,8 @@
# Unlintable Unlintable
> -->
# Unlintable Unlintable
> Computer science
# Unlintable NSg/VB+ N🅪Sg/VB+
> Computer science
# Unlintable HeadingStart NSg/VB+ N🅪Sg/VB+
>
#
> Computer science is the study of computation , information , and automation .
@@ -66,8 +66,8 @@
# N🅪Sg NPr/J/R/P NSg/VB+ N🅪Sg/VB+ .
>
#
> History
# N🅪Sg+
> History
# HeadingStart N🅪Sg+
>
#
> The earliest foundations of what would become computer science predate the
@@ -198,8 +198,8 @@
# VB/J NPl P NSg/VB+ NPr/J/R/P D$+ NSg/VB/J+ NPl/V3+ .
>
#
> Etymology and scope
# N🅪Sg VB/C NSg/VB+
> Etymology and scope
# HeadingStart N🅪Sg VB/C NSg/VB+
>
#
> Although first proposed in 1956 , the term " computer science " appears in a 1959
@@ -346,12 +346,12 @@
# NSg/C NSg/R/C NSg/P NSg/I/J/C/Dq Nᴹ/VB+ .
>
#
> Philosophy
# N🅪Sg/VB+
> Philosophy
# HeadingStart N🅪Sg/VB+
>
#
> Epistemology of computer science
# Nᴹ P NSg/VB+ N🅪Sg/VB+
> Epistemology of computer science
# HeadingStart Nᴹ P NSg/VB+ N🅪Sg/VB+
>
#
> Despite the word science in its name , there is debate over whether or not
@@ -416,8 +416,8 @@
# Nᴹ/Vg/J+ NPl/V3+ NSg/R J J NPl+ .
>
#
> Paradigms of computer science
# NPl P NSg/VB+ N🅪Sg/VB+
> Paradigms of computer science
# HeadingStart NPl P NSg/VB+ N🅪Sg/VB+
>
#
> A number of computer scientists have argued for the distinction of three
@@ -448,8 +448,8 @@
# NSg/VB/J . VB Nᴹ/Vg/J+ NPl+ .
>
#
> Fields
# NPrPl/V3+
> Fields
# HeadingStart NPrPl/V3+
>
#
> As a discipline , computer science spans a range of topics from theoretical
@@ -480,8 +480,8 @@
# NSg NSg/R N🅪Sg/Vg/J/C J NPl P NSg/VB+ N🅪Sg/VB+ .
>
#
> Theoretical computer science
# J+ NSg/VB+ N🅪Sg/VB+
> Theoretical computer science
# HeadingStart J+ NSg/VB+ N🅪Sg/VB+
>
#
> Theoretical computer science is mathematical and abstract in spirit , but it
@@ -494,8 +494,8 @@
# N🅪Sg/Vg/J+ . VB NPr/I/J/R/Dq NSg/J NPl .
>
#
> Theory of computation
# N🅪Sg P NSg
> Theory of computation
# HeadingStart N🅪Sg P NSg
>
#
> According to Peter Denning , the fundamental question underlying computer science
@@ -524,8 +524,8 @@
# NSg/J NPr/J/R/P D N🅪Sg P NSg .
>
#
> Information and coding theory
# Nᴹ VB/C Nᴹ/Vg/J+ N🅪Sg+
> Information and coding theory
# HeadingStart Nᴹ VB/C Nᴹ/Vg/J+ N🅪Sg+
>
#
> Information theory , closely related to probability and statistics , is related to
@@ -548,8 +548,8 @@
# NSg/J VB/C NSg/J+ N🅪Pl+ N🅪Sg+ NPl/V3+ .
>
#
> Data structures and algorithms
# N🅪Pl+ NPl/V3 VB/C NPl+
> Data structures and algorithms
# HeadingStart N🅪Pl+ NPl/V3 VB/C NPl+
>
#
> Data structures and algorithms are the studies of commonly used computational
@@ -558,8 +558,8 @@
# NPl/V3 VB/C D$+ J+ N🅪Sg+ .
>
#
> Programming language theory and formal methods
# Nᴹ/Vg/J+ N🅪Sg/VB+ N🅪Sg VB/C NSg/J+ NPl/V3+
> Programming language theory and formal methods
# HeadingStart Nᴹ/Vg/J+ N🅪Sg/VB+ N🅪Sg VB/C NSg/J NPl/V3+
>
#
> Programming language theory is a branch of computer science that deals with the
@@ -610,12 +610,12 @@
# NPl NPr/J/R/P Nᴹ VB/C Nᴹ+ NSg VB/C N🅪Sg+ .
>
#
> Applied computer science
# VP/J NSg/VB+ N🅪Sg/VB+
> Applied computer science
# HeadingStart VP/J NSg/VB+ N🅪Sg/VB+
>
#
> Computer graphics and visualization
# NSg/VB+ NPl VB/C NSg+
> Computer graphics and visualization
# HeadingStart NSg/VB+ NPl VB/C NSg+
>
#
> Computer graphics is the study of digital visual contents and involves the
@@ -630,8 +630,8 @@
# VB/C N🅪Sg/VB+ NPl/V3+ .
>
#
> Image and sound processing
# N🅪Sg/VB VB/C N🅪Sg/VB/J+ Nᴹ/Vg/J+
> Image and sound processing
# HeadingStart N🅪Sg/VB VB/C N🅪Sg/VB/J+ Nᴹ/Vg/J+
>
#
> Information can take the form of images , sound , video or other multimedia . Bits
@@ -654,8 +654,8 @@
# NSg/I/J P D VP/J NPl NPr/J/R/P J+ NSg/VB+ N🅪Sg/VB+ .
>
#
> Computational science , finance and engineering
# J N🅪Sg/VB+ . N🅪Sg/VB VB/C Nᴹ/Vg/J+
> Computational science , finance and engineering
# HeadingStart J N🅪Sg/VB+ . N🅪Sg/VB VB/C Nᴹ/Vg/J+
>
#
> Scientific computing ( or computational science ) is the field of study concerned
@@ -682,8 +682,8 @@
# NPl/V3 .
>
#
> Human computer interaction
# NSg/VB/J . NSg/VB+ N🅪Sg+
> Human computer interaction
# HeadingStart NSg/VB/J . NSg/VB+ N🅪Sg+
>
#
> Human computer interaction ( HCI ) is the field of study and research concerned
@@ -698,8 +698,8 @@
# NSg P NPl/V3+ .
>
#
> Software engineering
# Nᴹ+ Nᴹ/Vg/J+
> Software engineering
# HeadingStart Nᴹ+ Nᴹ/Vg/J+
>
#
> Software engineering is the study of designing , implementing , and modifying the
@@ -720,8 +720,8 @@
# VB/C Nᴹ+ N🅪Sg+ NPl/V3+ .
>
#
> Artificial intelligence
# J+ N🅪Sg+
> Artificial intelligence
# HeadingStart J+ N🅪Sg+
>
#
> Artificial intelligence ( AI ) aims to or is required to synthesize
@@ -760,12 +760,12 @@
# N🅪Pl+ .
>
#
> Computer systems
# NSg/VB+ NPl+
> Computer systems
# HeadingStart NSg/VB+ NPl+
>
#
> Computer architecture and microarchitecture
# NSg/VB+ N🅪Sg+ VB/C NSg
> Computer architecture and microarchitecture
# HeadingStart NSg/VB+ N🅪Sg+ VB/C NSg
>
#
> Computer architecture , or digital computer organization , is the conceptual
@@ -788,8 +788,8 @@
# NSg+ NPr/J/R/P NSg$ NSg/VB/J+ Nᴹ/VB+ NSg/VB/J+ NPr/J/R/P # .
>
#
> Concurrent , parallel and distributed computing
# NSg/J . NSg/VB/J VB/C VP/J Nᴹ/Vg/J+
> Concurrent , parallel and distributed computing
# HeadingStart NSg/J . NSg/VB/J VB/C VP/J Nᴹ/Vg/J+
>
#
> Concurrency is a property of systems in which several computations are executing
@@ -810,8 +810,8 @@
# VP/J P VB NSg/VB/J+ NPl/V3+ .
>
#
> Computer networks
# NSg/VB+ NPl/V3+
> Computer networks
# HeadingStart NSg/VB+ NPl/V3+
>
#
> This branch of computer science aims to manage networks between computers
@@ -820,8 +820,8 @@
# J .
>
#
> Computer security and cryptography
# NSg/VB+ Nᴹ+ VB/C Nᴹ
> Computer security and cryptography
# HeadingStart NSg/VB+ Nᴹ+ VB/C Nᴹ
>
#
> Computer security is a branch of computer technology with the objective of
@@ -848,8 +848,8 @@
# VP/J NPl/V3 .
>
#
> Databases and data mining
# NPl/V3 VB/C N🅪Pl+ Nᴹ/Vg/J+
> Databases and data mining
# HeadingStart NPl/V3 VB/C N🅪Pl+ Nᴹ/Vg/J+
>
#
> A database is intended to organize , store , and retrieve large amounts of data
@@ -862,8 +862,8 @@
# NPl/V3+ . N🅪Pl+ Nᴹ/Vg/J+ VL3 D/P NSg/VB P Nᴹ/Vg/J NPl/V3+ NPr/J/R/P NSg/J N🅪Pl+ NPl/V3 .
>
#
> Discoveries
# NPl+
> Discoveries
# HeadingStart NPl+
>
#
> The philosopher of computing Bill Rapaport noted three Great Insights of
@@ -976,8 +976,8 @@
#
>
#
> Programming paradigms
# Nᴹ/Vg/J+ NPl+
> Programming paradigms
# HeadingStart Nᴹ/Vg/J+ NPl+
>
#
> Programming languages can be used to accomplish different tasks in different
@@ -1044,8 +1044,8 @@
# D/P N🅪Sg/VB P NSg/VB+ C/P P NSg/J+ NPl+ .
>
#
> Research
# Nᴹ/VB+
> Research
# HeadingStart Nᴹ/VB+
>
#
> Conferences are important events for computer science research . During these

View File

@@ -1,5 +1,5 @@
> Difficult sentences
# VB/J+ NPl/V3+
> Difficult sentences
# HeadingStart VB/J+ NPl/V3+
>
#
> A collection of difficult sentences to test Harper's ability to correctly tag unusual / uncommon but correct sentences .
@@ -14,8 +14,8 @@
# NSg/I/J/R/Dq NSg/VB+ NPl/V3+ VB VPp/J P Url NSg/VB+ . NSg/VB/#r+ NSg/J/P . NPr/VB/J+ # .
>
#
> A
# D/P
> A
# HeadingStart D/P
>
#
> With one attack , he was torn a pieces .
@@ -24,12 +24,12 @@
# ISg/#r+ NSg/VB D$+ NPl+ R D/P+ NPr🅪Sg+ .
>
#
> At
# NSg/P
> At
# HeadingStart NSg/P
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> Caesar was at Rome ; a climate treaty was signed at Kyoto in 1997 .
@@ -68,28 +68,28 @@
# ISg+ VL3 NSg/P NPl VB/C NPl P ISg+ .
>
#
> Noun
# NSg/VB+
> Noun
# HeadingStart NSg/VB+
>
#
> The at sign .
# D NSg/P NSg/VB+ .
>
#
> Verb
# NSg/VB+
> Verb
# HeadingStart NSg/VB+
>
#
> ( In online chats : ) Don't @ me ! Don't at me !
# . NPr/J/R/P VB/J+ NPl/V3+ . . VB . NPr/ISg+ . VB NSg/P NPr/ISg+ .
>
#
> By
# NSg/J/P
> By
# HeadingStart NSg/J/P
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> The mailbox is by the bus stop .
@@ -176,8 +176,8 @@
# NSg/J/P ? . + NSg/J . . P . P .
>
#
> Adverb
# NSg/VB+
> Adverb
# HeadingStart NSg/VB+
>
#
> I watched the parade as it passed by .
@@ -192,8 +192,8 @@
# D+ NPl+ VB/J NSg/I/J/R/Dq N🅪Sg/VB/J+ P NSg/VB+ Nᴹ/Vg/J NPl/V3+ NSg/J/P R/C/P N🅪Sg/VB VB/C N🅪Sg/VB+ .
>
#
> Adjective
# NSg/VB/J+
> Adjective
# HeadingStart NSg/VB/J+
>
#
> a by path ; a by room ( Out of the way , off to one side . )
@@ -202,20 +202,20 @@
# NSg/J/P NSg/VB . D/P NSg/J/P NSg/VB . NSg/J+ . NSg/J . .
>
#
> For
# R/C/P
> For
# HeadingStart R/C/P
>
#
> Conjunction
# NSg/VB+
> Conjunction
# HeadingStart NSg/VB+
>
#
> I had to stay with my wicked stepmother , for I had nowhere else to go .
# ISg/#r+ VB P NSg/VB/J P D$+ VP/J NSg . R/C/P ISg/#r+ VB NSg/J NSg/J/C P NSg/VB/J .
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> The astronauts headed for the moon .
@@ -320,8 +320,8 @@
# P NSg/VB R/C/P NSg$+ NSg+ .
>
#
> From
# P
> From
# HeadingStart P
>
#
> Paul is from New Zealand .
@@ -364,12 +364,12 @@
# NPr/ISg+ NPl/V3 NPr/VB/J P NSg/VB/J/R .
>
#
> In
# NPr/J/R/P
> In
# HeadingStart NPr/J/R/P
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> Who lives in a pineapple under the sea ?
@@ -450,16 +450,16 @@
# NSg/J+ NPl/V3+ VXB NSg/VXB NSg/J NPr/J/R/P N🅪Sg/I/VB+ . NSg/C/P NSg/R/C VP/J .
>
#
> Verb
# NSg/VB+
> Verb
# HeadingStart NSg/VB+
>
#
> He that ears my land spares my team and gives me leave to in the crop .
# NPr/ISg+ NSg/I/C/Ddem+ NPl/V3+ D$+ NPr🅪Sg/VB+ NPl/V3 D$+ NSg/VB+ VB/C NPl/V3 NPr/ISg+ NSg/VB P NPr/J/R/P D NSg/VB+ .
>
#
> Adverb
# NSg/VB+
> Adverb
# HeadingStart NSg/VB+
>
#
> Suddenly a strange man walked in .
@@ -478,16 +478,16 @@
# D NSg/VB NSg/VB/J VB VBPp Vg/J # NPl/V3+ NPr/J/R/P .
>
#
> Noun
# NSg/VB+
> Noun
# HeadingStart NSg/VB+
>
#
> His parents got him an in with the company .
# ISg/D$+ NPl/V3+ VP ISg+ D/P NPr/J/R/P P D+ N🅪Sg/VB+ .
>
#
> Adjective
# NSg/VB/J+
> Adjective
# HeadingStart NSg/VB/J+
>
#
> Is Mr . Smith in ?
@@ -522,8 +522,8 @@
# ISg/#r+ VPt # NPl NPr/J/R/P NSg/I/C D+ N🅪Sg/VB/J+ VP/J .
>
#
> Unit
# NSg+
> Unit
# HeadingStart NSg+
>
#
> The glass is 8 inches .
@@ -532,8 +532,8 @@
# D+ NPr🅪Sg/VB+ VL3 # NPr/J/R/P .
>
#
> Of
# P
> Of
# HeadingStart P
>
#
> Take the chicken out of the freezer .
@@ -582,12 +582,12 @@
# P D/P NSg/VB/J P NSg+ NPl+ . D+ NSg/VB/J+ R VPt NSg/VB/J/P .
>
#
> On
# J/P
> On
# HeadingStart J/P
>
#
> Adjective
# NSg/VB/J+
> Adjective
# HeadingStart NSg/VB/J+
>
#
> All the lights are on , so they must be home .
@@ -624,8 +624,8 @@
# NPr/ISg+ R V3 P NSg/VXB J/P . + NSg/I/J/R/C Nᴹ/Vg/J .
>
#
> Adverb
# NSg/VB+
> Adverb
# HeadingStart NSg/VB+
>
#
> turn the television on
@@ -648,8 +648,8 @@
# NSg+ NPl+ J/P . NSg/I/J+ VB VP/J NPr/J/R/P D+ NSg+ .
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> A vase of flowers stood on the table .
@@ -768,20 +768,20 @@
# VB VB NPr/VB J/P ISg/D$+ VB/C NSg/VB ISg/D$+ NPr/J/R/P N🅪Sg/VB+ .
>
#
> Verb
# NSg/VB+
> Verb
# HeadingStart NSg/VB+
>
#
> Can you on the light ? ( switch on )
# NPr/VXB ISgPl+ J/P D+ N🅪Sg/VB/J+ . . NSg/VB/J+ J/P .
>
#
> To
# P
> To
# HeadingStart P
>
#
> Particle
# NSg+
> Particle
# HeadingStart NSg+
>
#
> I want to leave .
@@ -804,8 +804,8 @@
# ISg/#r+ NSg/VPt P D+ NPl/V3+ P NSg/VB I/J/R/Dq+ N🅪Sg/VB+ .
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> She looked to the heavens .
@@ -852,20 +852,20 @@
# NSg$ D+ N🅪Sg/VB/J+ . . + NSg/VB/J+ P NSg NPr/J/R/P D+ N🅪Sg+ . NPr/C # . # NSg/VB+ . .
>
#
> Adverb
# NSg/VB+
> Adverb
# HeadingStart NSg/VB+
>
#
> Please push the door to . ( close )
# VB NSg/VB D+ NSg/VB+ P . . NSg/VB/J .
>
#
> With
# P
> With
# HeadingStart P
>
#
> Preposition
# NSg/VB
> Preposition
# HeadingStart NSg/VB
>
#
> He picked a fight with the class bully .
@@ -930,8 +930,8 @@
# NSg/I/C/Ddem+ VPt D/P NPr/VB P VB . VB ISgPl+ NSg/VB/J P NPr/ISg+ .
>
#
> Adverb
# NSg/VB+
> Adverb
# HeadingStart NSg/VB+
>
#
> Do you want to come with ?

View File

@@ -6,8 +6,8 @@
# Unlintable Unlintable
> -->
# Unlintable Unlintable
> Part - of - speech tagging
# Unlintable NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg
> Part - of - speech tagging
# Unlintable HeadingStart NSg/VB/J+ . P . N🅪Sg/VB+ NSg/Vg
>
#
> In corpus linguistics , part - of - speech tagging ( POS tagging or PoS tagging or
@@ -36,8 +36,8 @@
# NSg/VB/J VB/C NSg/I/J/R/Dq R VP/J NPr🅪Sg/VB/J+ NSg+ . NPl . NPl/V3 NSg/VB+ . VP/J NPl+ .
>
#
> Principle
# N🅪Sg/VB+
> Principle
# HeadingStart N🅪Sg/VB+
>
#
> Part - of - speech tagging is harder than just having a list of words and their
@@ -72,8 +72,8 @@
# J NSg/VB+ . R . . .
>
#
> Tag sets
# NSg/VB+ NPl/V3
> Tag sets
# HeadingStart NSg/VB+ NPl/V3
>
#
> Schools commonly teach that there are 9 parts of speech in English : noun , verb ,
@@ -156,12 +156,12 @@
# J/P D N🅪Sg/VB+ NSg/P NSg/VB+ . NSg/J NSg/Vg VL3 NSg/JC J/P NSg/JC NSg/VB+ . NPl/V3 .
>
#
> History
# N🅪Sg+
> History
# HeadingStart N🅪Sg+
>
#
> The Brown Corpus
# D+ NPr🅪Sg/VB/J+ NSg+
> The Brown Corpus
# HeadingStart D+ NPr🅪Sg/VB/J NSg+
>
#
> Research on part - of - speech tagging has been closely tied to corpus linguistics .
@@ -228,8 +228,8 @@
# NPl+ NSg/VB NSg/VXB VP/J R/C/P Dq+ NSg/VB+ .
>
#
> Use of hidden Markov models
# N🅪Sg/VB P VB/J NPr NPl/V3+
> Use of hidden Markov models
# HeadingStart N🅪Sg/VB P VB/J NPr NPl/V3+
>
#
> In the mid - 1980s , researchers in Europe began to use hidden Markov models ( HMMs )
@@ -308,8 +308,8 @@
# NSg .
>
#
> Dynamic programming methods
# NSg/J+ Nᴹ/Vg/J+ NPl/V3+
> Dynamic programming methods
# HeadingStart NSg/J+ Nᴹ/Vg/J+ NPl/V3+
>
#
> In 1987 , Steven DeRose and Kenneth W. Church independently developed dynamic
@@ -358,8 +358,8 @@
# NSg/VB/J+ . P . N🅪Sg/VB+ NSg+ .
>
#
> Unsupervised taggers
# VB/J NPl
> Unsupervised taggers
# HeadingStart VB/J NPl
>
#
> The methods already discussed involve working from a pre - existing corpus to
@@ -390,8 +390,8 @@
# J NPl/V3+ .
>
#
> Other taggers and methods
# NSg/VB/J NPl VB/C NPl/V3+
> Other taggers and methods
# HeadingStart NSg/VB/J NPl VB/C NPl/V3+
>
#
> Some current major algorithms for part - of - speech tagging include the Viterbi

View File

@@ -1,5 +1,5 @@
> Spell
# NSg/VB
> Spell
# HeadingStart NSg/VB
>
#
> This document contains a list of words spelled correctly in some dialects of English , but not American English . This is designed to test the spelling suggestions we give for such mistakes .
@@ -10,8 +10,8 @@
# P VB I/Ddem+ . D NSg P I/Ddem NSg/VB+ V3 Unlintable . I/C+ NPr/VXB NPr/VB D NSg/VB+ NSg P N🅪Sg/VB D NPr/J NSg+ . NPr/VB/J/R C/P Nᴹ/Vg/J P N🅪Sg/VB D/P R VP/J NSg+ .
>
#
> Words
# NPl/V3+
> Words
# HeadingStart NPl/V3+
>
#
>

View File

@@ -1,13 +1,13 @@
> Spell
# NSg/VB
> Spell
# HeadingStart NSg/VB
>
#
> This document contains example sentences with misspelled words that we want to test the spell checker on .
# I/Ddem+ NSg/VB+ V3 NSg/VB+ NPl/V3+ P VP/J NPl/V3+ NSg/I/C/Ddem+ IPl+ NSg/VB P NSg/VB D NSg/VB NSg/VB J/P .
>
#
> Example Sentences
# NSg/VB+ NPl/V3+
> Example Sentences
# HeadingStart NSg/VB+ NPl/V3+
>
#
> My favourite color is blu .

View File

@@ -1,13 +1,13 @@
> Swears
# NPl/V3
> Swears
# HeadingStart NPl/V3
>
#
> This documents tests that different forms / variations of swears are tagged as such .
# I/Ddem+ NPl/V3+ NPl/V3+ NSg/I/C/Ddem NSg/J+ NPl/V3+ . NPl P NPl/V3 VB VP/J NSg/R NSg/I .
>
#
> Examples
# NPl/V3+
> Examples
# HeadingStart NPl/V3+
>
#
> One turd , two turds .

View File

@@ -1,7 +1,7 @@
> <!-- source: https://github.com/JesseKPhillips/USA-Constitution/blob/4cfdd130709fa7e8db998383b6917ba33b402ec6/Constitution.md -->
# Unlintable
> The Constitution Of The United States Of America
# Unlintable D NPr+ P D VP/J NPrPl/V3+ P NPr+
> The Constitution Of The United States Of America
# Unlintable HeadingStart D NPr+ P D VP/J NPrPl/V3 P NPr+
>
#
> We the People of the United States , in Order to form a more perfect Union ,
@@ -16,12 +16,12 @@
# NPrPl/V3 P NPr+ .
>
#
> Article . I.
# NSg/VB+ . ?
> Article . I.
# HeadingStart NSg/VB+ . ?
>
#
> Section . 1 .
# NSg/VB+ . # .
> Section . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> All legislative Powers herein granted shall be vested in a
@@ -66,8 +66,8 @@
# NPl+ VXB NSg/J/R/C NSg/VB .
>
#
> Section . 2 .
# NSg/VB+ . # .
> Section . 2 .
# HeadingStart NSg/VB+ . # .
>
#
> The House of Representatives shall be composed of Members
@@ -144,8 +144,8 @@
# VXB NSg/VXB D NSg/VB/J N🅪Sg/VB/J+ P N🅪Sg .
>
#
> Section . 3 .
# NSg/VB+ . # .
> Section . 3 .
# HeadingStart NSg/VB+ . # .
>
#
> The Senate of the United States shall be composed of two
@@ -226,8 +226,8 @@
# N🅪Sg/VB .
>
#
> Section . 4 .
# NSg/VB+ . # .
> Section . 4 .
# HeadingStart NSg/VB+ . # .
>
#
> The Times , Places and Manner of holding Elections for Senators
@@ -248,8 +248,8 @@
# NSg/J NPr🅪Sg+ .
>
#
> Section . 5 .
# NSg/VB+ . # .
> Section . 5 .
# HeadingStart NSg/VB+ . # .
>
#
> Each House shall be the Judge of the Elections , Returns and
@@ -288,8 +288,8 @@
# NPr/J/R/P I/C+ D NSg NPl/V3+ VXB NSg/VXB NSg/Vg/J .
>
#
> Section . 6 .
# NSg/VB+ . # .
> Section . 6 .
# HeadingStart NSg/VB+ . # .
>
#
> The Senators and Representatives shall receive a Compensation
@@ -326,8 +326,8 @@
# NSg/VXB VP/J .
>
#
> Section . 7 .
# NSg/VB+ . # .
> Section . 7 .
# HeadingStart NSg/VB+ . # .
>
#
> All Bills for raising Revenue shall originate in the House of
@@ -384,8 +384,8 @@
# P D NPl/V3 VB/C NPl+ VP/J NPr/J/R/P D NPr🅪Sg/VB P D/P NPr/VB+ .
>
#
> Section . 8 .
# NSg/VB+ . # .
> Section . 8 .
# HeadingStart NSg/VB+ . # .
>
#
> The Congress shall have Power To lay and collect Taxes , Duties ,
@@ -582,8 +582,8 @@
#
>
#
> Section . 9 .
# NSg/VB+ . # .
> Section . 9 .
# HeadingStart NSg/VB+ . # .
>
#
> The Migration or Importation of such Persons as any of the
@@ -662,8 +662,8 @@
# N🅪Sg/VB NPr/C NSg/VB/J N🅪Sg/VB+ .
>
#
> Section . 10 .
# NSg/VB+ . # .
> Section . 10 .
# HeadingStart NSg/VB+ . # .
>
#
> No State shall enter into any Treaty , Alliance , or
@@ -702,12 +702,12 @@
# VP/J . NPr/C NPr/J/R/P NSg/I J N🅪Sg/VB/JC+ NSg/R NPr/VXB NSg/R/C VB P NSg/VB/J+ .
>
#
> Article . II .
# NSg/VB+ . #r .
> Article . II .
# HeadingStart NSg/VB+ . #r .
>
#
> Section . 1 .
# NSg/VB+ . # .
> Section . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> The executive Power shall be vested in a President of the
@@ -732,8 +732,8 @@
# NPrPl/V3+ . VXB NSg/VXB VP/J D/P NSg .
>
#
> SubSection . 1 .
# NSg/VB+ . # .
> SubSection . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> The Electors shall meet in their respective states , and vote
@@ -828,8 +828,8 @@
# VP/J NPrPl/V3+ .
>
#
> SubSection . 2
# NSg/VB+ . #
> SubSection . 2
# HeadingStart NSg/VB+ . #
>
#
> No Person except a natural born Citizen , or a Citizen of the
@@ -864,8 +864,8 @@
# Nᴹ/Vg/J NSg/R NSg/VB+ VB/P D NSg/VB/J P NSg/I+ NSg/VB/J+ .
>
#
> SubSection 3 .
# NSg/VB+ # .
> SubSection 3 .
# HeadingStart NSg/VB+ # .
>
#
> In case of the removal of the President from office or of his
@@ -942,8 +942,8 @@
# VB/C NPl P ISg/D$+ NSg/VB+ .
>
#
> SubSection 4 .
# NSg/VB+ # .
> SubSection 4 .
# HeadingStart NSg/VB+ # .
>
#
> The President shall , at stated Times , receive for his
@@ -970,8 +970,8 @@
# NPrPl/V3+ . .
>
#
> SubSection 5 .
# NSg/VB+ # .
> SubSection 5 .
# HeadingStart NSg/VB+ # .
>
#
> The District constituting the seat of Government of the
@@ -998,8 +998,8 @@
# NPr+ .
>
#
> Section . 2 .
# NSg/VB+ . # .
> Section . 2 .
# HeadingStart NSg/VB+ . # .
>
#
> The President shall be Commander in Chief of the Army and Navy
@@ -1054,8 +1054,8 @@
# N🅪Sg/VB+ .
>
#
> Section . 3 .
# NSg/VB+ . # .
> Section . 3 .
# HeadingStart NSg/VB+ . # .
>
#
> He shall from time to time give to the Congress Information of
@@ -1076,8 +1076,8 @@
# N🅪Sg/VB NSg/I/J/C/Dq D NPl/V3 P D VP/J NPrPl/V3+ .
>
#
> Section . 4 .
# NSg/VB+ . # .
> Section . 4 .
# HeadingStart NSg/VB+ . # .
>
#
> The President , Vice President and all civil Officers of the
@@ -1088,12 +1088,12 @@
# P . NSg . Nᴹ . NPr/C NSg/VB/J NSg/VB/J/R NPl/V3+ VB/C NPl .
>
#
> Article . III .
# NSg/VB+ . #r .
> Article . III .
# HeadingStart NSg/VB+ . #r .
>
#
> Section . 1 .
# NSg/VB+ . # .
> Section . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> The judicial Power of the United States , shall be vested in
@@ -1110,8 +1110,8 @@
# VB/P D$+ NSg NPr/J/R/P NSg/VB+ .
>
#
> Section . 2 .
# NSg/VB+ . # .
> Section . 2 .
# HeadingStart NSg/VB+ . # .
>
#
> The judicial Power shall extend to all Cases , in Law and
@@ -1154,8 +1154,8 @@
# N🅪Sg/VB NPr/C NPl/V3+ NSg/R D NPr/VB+ NPr/VXB NSg/J/P N🅪Sg/VB+ NSg/VXB VP/J .
>
#
> Section . 3 .
# NSg/VB+ . # .
> Section . 3 .
# HeadingStart NSg/VB+ . # .
>
#
> Treason against the United States , shall consist only in
@@ -1176,8 +1176,8 @@
# VB/P D N🅪Sg/VB P D NSg/VB+ VP/J .
>
#
> Section . 4 .
# NSg/VB+ . # .
> Section . 4 .
# HeadingStart NSg/VB+ . # .
>
#
> The right of the people to be secure in their persons , houses ,
@@ -1242,12 +1242,12 @@
# VB/C NSg/J NPl+ VP/J .
>
#
> Article . IV .
# NSg/VB+ . NSg/J/#r+ .
> Article . IV .
# HeadingStart NSg/VB+ . NSg/J/#r+ .
>
#
> Section . 1 .
# NSg/VB+ . # .
> Section . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> Full Faith and Credit shall be given in each State to the
@@ -1260,8 +1260,8 @@
# VB/C NPl+ VXB NSg/VXB VP/J . VB/C D+ NSg/VB+ R .
>
#
> Section . 2 .
# NSg/VB+ . # .
> Section . 2 .
# HeadingStart NSg/VB+ . # .
>
#
> All persons born or naturalized in the United States , and
@@ -1314,8 +1314,8 @@
# NSg/VB NPr/C NPr🅪Sg/VB/Comm+ NPr/VXB NSg/VXB NSg/J .
>
#
> Section . 3 .
# NSg/VB+ . # .
> Section . 3 .
# HeadingStart NSg/VB+ . # .
>
#
> New States may be admitted by the Congress into this Union ; but
@@ -1340,8 +1340,8 @@
# I/R/Dq NPl/V3 P D VP/J NPrPl/V3+ . NPr/C P I/R/Dq NSg/J N🅪Sg/VB+ .
>
#
> Section . 4 .
# NSg/VB+ . # .
> Section . 4 .
# HeadingStart NSg/VB+ . # .
>
#
> The United States shall guarantee to every State in this Union
@@ -1354,8 +1354,8 @@
# NSg+ NSg/VB NSg/VXB VP/J . C/P NSg/J Nᴹ/VB+ .
>
#
> Section . 5 .
# NSg/VB+ . # .
> Section . 5 .
# HeadingStart NSg/VB+ . # .
>
#
> The validity of the public debt of the United States ,
@@ -1374,8 +1374,8 @@
# NSg/I NPl+ . NPl VB/C NPl/V3+ VXB NSg/VXB VB NSg/J VB/C NSg/VB/J+ .
>
#
> Article . V.
# NSg/VB+ . ?
> Article . V.
# HeadingStart NSg/VB+ . ?
>
#
> The Congress , whenever two thirds of both Houses shall deem it necessary , shall
@@ -1402,8 +1402,8 @@
# C/P ISg/D$+ N🅪Sg/VB . VXB NSg/VXB VP/J P ISg/D$+ NSg/VB/J NSg+ NPr/J/R/P D NPr+ .
>
#
> Article . VI .
# NSg/VB+ . NPr/#r .
> Article . VI .
# HeadingStart NSg/VB+ . NPr/#r .
>
#
> All Debts contracted and Engagements entered into , before the Adoption of this
@@ -1446,8 +1446,8 @@
# NPr/VB/J P D NPl/VB+ P NSg/VB VB/C NSg/VB/J+ NPl/V3+ . VXB NSg/R/C NSg/VXB VP/J .
>
#
> Section . 1 .
# NSg/VB+ . # .
> Section . 1 .
# HeadingStart NSg/VB+ . # .
>
#
> The enumeration in the Constitution , of certain rights , shall
@@ -1464,8 +1464,8 @@
# D NPl/VB+ .
>
#
> Article . VII .
# NSg/VB+ . NSg/#r .
> Article . VII .
# HeadingStart NSg/VB+ . NSg/#r .
>
#
> The Ratification of the Conventions of nine States , shall be sufficient for the
@@ -1498,12 +1498,12 @@
# NSg/J NPr/J/R/P NSg/VB C IPl+ NSg/VXB R VP/J D$+ NPl/V3+ .
>
#
> Article . VIII .
# NSg/VB+ . #r .
> Article . VIII .
# HeadingStart NSg/VB+ . #r .
>
#
> Section 1 .
# NSg/VB+ # .
> Section 1 .
# HeadingStart NSg/VB+ # .
>
#
> The transportation or importation into any State , Territory , or

View File

@@ -1,13 +1,13 @@
> The Great Gatsby
# D NSg/J NPr
> The Great Gatsby
# HeadingStart D NSg/J NPr
>
#
> BY F. SCOTT FITZGERALD
# NSg/J/P ? NPr+ NPr
>
#
> CHAPTER I
# NSg/VB+ ISg/#r+
> CHAPTER I
# HeadingStart NSg/VB+ ISg/#r+
>
#
> In my younger and more vulnerable years my father gave me some advice that Ive
@@ -1296,8 +1296,8 @@
# NPr/J/R/P D VB/J Nᴹ+ .
>
#
> CHAPTER II
# NSg/VB+ #r
> CHAPTER II
# HeadingStart NSg/VB+ #r
>
#
> About half way between West Egg and New York the motor road hastily joins the
@@ -2310,8 +2310,8 @@
# R NSg/VB+ .
>
#
> CHAPTER III
# NSg/VB+ #r
> CHAPTER III
# HeadingStart NSg/VB+ #r
>
#
> There was music from my neighbors house through the summer nights . In his blue
@@ -3656,8 +3656,8 @@
# NSg/I/VB+ . ISg/#r+ NPr/VB/J NSg/I/J P D+ NSg/I/Dq+ VB/JS+ NPl/VB+ NSg/I/C/Ddem+ ISg/#r+ NSg/VXB J/R VPp/J .
>
#
> CHAPTER IV
# NSg/VB+ NSg/J/#r+
> CHAPTER IV
# HeadingStart NSg/VB+ NSg/J/#r+
>
#
> On Sunday morning while church bells rang in the villages alongshore , the world
@@ -4944,8 +4944,8 @@
# NSg/VB+ .
>
#
> CHAPTER V
# NSg/VB+ NSg/P/#r+
> CHAPTER V
# HeadingStart NSg/VB+ NSg/P/#r+
>
#
> When I came home to West Egg that night I was afraid for a moment that my house
@@ -6002,8 +6002,8 @@
# NSg/VB/J+ NPl/V3+ P D+ N🅪Sg/VB+ . Nᴹ/Vg/J NSg/IPl+ R+ J .
>
#
> CHAPTER VI
# NSg/VB+ NPr/#r
> CHAPTER VI
# HeadingStart NSg/VB+ NPr/#r
>
#
> About this time an ambitious young reporter from New York arrived one morning at
@@ -6982,8 +6982,8 @@
# VP/J VPt ? NSg/J .
>
#
> CHAPTER VII
# NSg/VB+ NSg/#r
> CHAPTER VII
# HeadingStart NSg/VB+ NSg/#r
>
#
> It was when curiosity about Gatsby was at its highest that the lights in his
@@ -9466,8 +9466,8 @@
# VB/J VB/C NPr/VB/J ISg+ Nᴹ/Vg/J R NPr/J/R/P D+ N🅪Sg/VB+ . Nᴹ/Vg/J NSg/J/P NSg/I/J+ .
>
#
> CHAPTER VIII
# NSg/VB+ #r
> CHAPTER VIII
# HeadingStart NSg/VB+ #r
>
#
> I couldnt sleep all night ; a fog - horn was groaning incessantly on the Sound ,
@@ -10448,8 +10448,8 @@
# NSg$ NSg/VB+ D/P NPr/I/J/Dq NSg/J+ NSg/VB/J/P NPr/J/R/P D NPr🅪Sg/VB+ . VB/C D NPr/VB+ VPt NSg/VB/J .
>
#
> CHAPTER IX
# NSg/VB+ #r
> CHAPTER IX
# HeadingStart NSg/VB+ #r
>
#
> After two years I remember the rest of that day , and that night and the next

View File

@@ -10,8 +10,8 @@
# . I/Ddem . NPr/J/R/P NSg/J NPl/V3 P D/P NPr/VB P NSg/VB+ NPl/V3+ .
>
#
> Examples
# NPl/V3+
> Examples
# HeadingStart NPl/V3+
>
#
> This triangle is nice .

View File

@@ -7,7 +7,7 @@ use std::sync::Arc;
use harper_core::DialectFlags;
use harper_core::language_detection::is_doc_likely_english;
use harper_core::linting::{LintGroup, Linter as _};
use harper_core::parsers::{IsolateEnglish, Markdown, Parser, PlainEnglish};
use harper_core::parsers::{IsolateEnglish, Markdown, OopsAllHeadings, Parser, PlainEnglish};
use harper_core::remove_overlaps_map;
use harper_core::{
CharString, DictWordMetadata, Document, IgnoredLints, LintContext, Lrc, remove_overlaps,
@@ -253,11 +253,20 @@ impl Linter {
ctx.default_hash()
}
pub fn organized_lints(&mut self, text: String, language: Language) -> Vec<OrganizedGroup> {
pub fn organized_lints(
&mut self,
text: String,
language: Language,
all_headings: bool,
) -> Vec<OrganizedGroup> {
let source: Vec<_> = text.chars().collect();
let source = Lrc::new(source);
let parser = language.create_parser();
let mut parser = language.create_parser();
if all_headings {
parser = Box::new(OopsAllHeadings::new(parser));
}
let document = Document::new_from_vec(source.clone(), &parser, &self.dictionary);
@@ -292,11 +301,15 @@ impl Linter {
}
/// Perform the configured linting on the provided text.
pub fn lint(&mut self, text: String, language: Language) -> Vec<Lint> {
pub fn lint(&mut self, text: String, language: Language, all_headings: bool) -> Vec<Lint> {
let source: Vec<_> = text.chars().collect();
let source = Lrc::new(source);
let parser = language.create_parser();
let mut parser = language.create_parser();
if all_headings {
parser = Box::new(OopsAllHeadings::new(parser));
}
let document = Document::new_from_vec(source.clone(), &parser, &self.dictionary);
@@ -641,7 +654,7 @@ mod tests {
linter.import_words(vec![text.clone()]);
dbg!(linter.dictionary.get_word_metadata_str(&text));
let lints = linter.lint(text, Language::Plain);
let lints = linter.lint(text, Language::Plain, false);
assert!(lints.is_empty());
}
}

View File

@@ -1,4 +1,4 @@
import type { Dialect, LintConfig } from 'harper.js';
import type { Dialect, LintConfig, LintOptions } from 'harper.js';
import type { UnpackedLintGroups } from 'lint-framework';
import { LRUCache } from 'lru-cache';
import type { ActivationKey } from './protocol';
@@ -9,16 +9,20 @@ export default class ProtocolClient {
ttl: 5_000,
});
private static cacheKey(text: string, domain: string): string {
return `${domain}:${text}`;
private static cacheKey(text: string, domain: string, options?: LintOptions): string {
return `${domain}:${text}:${options?.forceAllHeadings ?? ''}:${options?.language ?? ''}`;
}
public static async lint(text: string, domain: string): Promise<UnpackedLintGroups> {
const key = this.cacheKey(text, domain);
public static async lint(
text: string,
domain: string,
options?: LintOptions,
): Promise<UnpackedLintGroups> {
const key = this.cacheKey(text, domain, options);
let p = this.lintCache.get(key);
if (!p) {
p = chrome.runtime
.sendMessage({ kind: 'lint', text, domain })
.sendMessage({ kind: 'lint', text, domain, options })
.then((r) => r.lints as UnpackedLintGroups);
this.lintCache.set(key, p);
}

View File

@@ -162,7 +162,7 @@ async function handleLint(req: LintRequest): Promise<LintResponse> {
return { kind: 'lints', lints: {} };
}
const grouped = await linter.organizedLints(req.text);
const grouped = await linter.organizedLints(req.text, req.options);
const unpackedEntries = await Promise.all(
Object.entries(grouped).map(async ([source, lints]) => {
const unpacked = await Promise.all(lints.map((lint) => unpackLint(req.text, lint, linter)));

View File

@@ -13,22 +13,25 @@ if (isWordPress()) {
ProtocolClient.setDomainEnabled(window.location.hostname, true, false);
}
const fw = new LintFramework((text, domain) => ProtocolClient.lint(text, domain), {
ignoreLint: (hash) => ProtocolClient.ignoreHash(hash),
getActivationKey: () => ProtocolClient.getActivationKey(),
openOptions: () => ProtocolClient.openOptions(),
addToUserDictionary: (words) => ProtocolClient.addToUserDictionary(words),
reportError: (lint: UnpackedLint, ruleId: string) =>
ProtocolClient.openReportError(
padWithContext(lint.source, lint.span.start, lint.span.end, 15),
ruleId,
'',
),
setRuleEnabled: async (ruleId, enabled) => {
await ProtocolClient.setRuleEnabled(ruleId, enabled);
fw.update();
const fw = new LintFramework(
(text, domain, options) => ProtocolClient.lint(text, domain, options),
{
ignoreLint: (hash) => ProtocolClient.ignoreHash(hash),
getActivationKey: () => ProtocolClient.getActivationKey(),
openOptions: () => ProtocolClient.openOptions(),
addToUserDictionary: (words) => ProtocolClient.addToUserDictionary(words),
reportError: (lint: UnpackedLint, ruleId: string) =>
ProtocolClient.openReportError(
padWithContext(lint.source, lint.span.start, lint.span.end, 15),
ruleId,
'',
),
setRuleEnabled: async (ruleId, enabled) => {
await ProtocolClient.setRuleEnabled(ruleId, enabled);
fw.update();
},
},
});
);
function padWithContext(source: string, start: number, end: number, contextLength: number): string {
const normalizedStart = Math.max(0, Math.min(start, source.length));
@@ -40,7 +43,7 @@ function padWithContext(source: string, start: number, end: number, contextLengt
}
const keepAliveCallback = () => {
ProtocolClient.lint('', 'example.com');
ProtocolClient.lint('', 'example.com', {});
setTimeout(keepAliveCallback, 400);
};

View File

@@ -1,4 +1,4 @@
import type { Dialect, LintConfig } from 'harper.js';
import type { Dialect, LintConfig, LintOptions } from 'harper.js';
import type { UnpackedLintGroups } from 'lint-framework';
export type Request =
@@ -40,6 +40,7 @@ export type LintRequest = {
kind: 'lint';
domain: string;
text: string;
options: LintOptions;
};
export type LintResponse = {

View File

@@ -347,6 +347,34 @@ for (const [linterName, Linter] of Object.entries(linters)) {
expect(text.slice(span.start, span.end)).toBe('sdssda');
});
test(`${linterName} lints headings when forced to mark them as such`, async () => {
const text = 'This sentences should be forced to title case.';
const linter = new LocalLinter({ binary });
const lints = await linter.lint(text, { forceAllHeadings: true });
expect(lints.length).toBe(1);
const lint = lints[0];
expect(lint.lint_kind()).toBe('Capitalization');
expect(lint.get_problem_text()).toBe(text);
});
test(`${linterName} lints headings when forced to mark them as such with organized mode`, async () => {
const text = 'This sentences should be forced to title case.';
const linter = new LocalLinter({ binary });
const lints = await linter.organizedLints(text, { forceAllHeadings: true });
const titleCaseLints = lints.UseTitleCase;
expect(titleCaseLints).not.toBeUndefined();
expect(titleCaseLints.length).toBe(1);
const lint = titleCaseLints[0];
expect(lint.lint_kind()).toBe('Capitalization');
expect(lint.get_problem_text()).toBe(text);
});
}
test('Linters have the same config format', async () => {

View File

@@ -35,7 +35,7 @@ export default class LocalLinter implements Linter {
async lint(text: string, options?: LintOptions): Promise<Lint[]> {
const inner = await this.inner;
const language = options?.language === 'plaintext' ? Language.Plain : Language.Markdown;
const lints = inner.lint(text, language);
const lints = inner.lint(text, language, options?.forceAllHeadings ?? false);
return lints;
}
@@ -43,7 +43,7 @@ export default class LocalLinter implements Linter {
async organizedLints(text: string, options?: LintOptions): Promise<Record<string, Lint[]>> {
const inner = await this.inner;
const language = options?.language === 'plaintext' ? Language.Plain : Language.Markdown;
const lintGroups = inner.organized_lints(text, language);
const lintGroups = inner.organized_lints(text, language, options?.forceAllHeadings ?? false);
const output: Record<string, Lint[]> = {};

View File

@@ -17,4 +17,7 @@ export type LintConfig = Record<string, boolean | null>;
export interface LintOptions {
/** The markup language that is being passed. Defaults to `markdown`. */
language?: 'plaintext' | 'markdown';
/** Force the entirety of the document to be composed of headings. An undefined value is assumed to be false.*/
forceAllHeadings?: boolean;
}

View File

@@ -1,6 +1,7 @@
import type { LintOptions } from 'harper.js';
import type { IgnorableLintBox } from './Box';
import computeLintBoxes from './computeLintBoxes';
import { isVisible } from './domUtils';
import { isHeading, isVisible } from './domUtils';
import Highlights from './Highlights';
import PopupHandler from './PopupHandler';
import type { UnpackedLint, UnpackedLintGroups } from './unpackLint';
@@ -27,7 +28,11 @@ export default class LintFramework {
private updateEventCallback: () => void;
/** Function used to fetch lints for a given text/domain. */
private lintProvider: (text: string, domain: string) => Promise<UnpackedLintGroups>;
private lintProvider: (
text: string,
domain: string,
options?: LintOptions,
) => Promise<UnpackedLintGroups>;
/** Actions wired by host environment (extension/app). */
private actions: {
ignoreLint?: (hash: string) => Promise<void>;
@@ -39,7 +44,11 @@ export default class LintFramework {
};
constructor(
lintProvider: (text: string, domain: string) => Promise<UnpackedLintGroups>,
lintProvider: (
text: string,
domain: string,
options?: LintOptions,
) => Promise<UnpackedLintGroups>,
actions: {
ignoreLint?: (hash: string) => Promise<void>;
getActivationKey?: () => Promise<ActivationKey>;
@@ -120,7 +129,9 @@ export default class LintFramework {
return { target: null as HTMLElement | null, lints: {} };
}
const lintsBySource = await this.lintProvider(text, window.location.hostname);
const lintsBySource = await this.lintProvider(text, window.location.hostname, {
forceAllHeadings: isHeading(target),
});
return { target: target as HTMLElement, lints: lintsBySource };
}),
);

View File

@@ -112,6 +112,17 @@ export function getRangeForTextSpan(target: Element, span: Span): Range | null {
const sharedRange: Range | null = typeof document !== 'undefined' ? document.createRange() : null;
/** Check if a node represents a heading (native heading tags or role="heading"). */
export function isHeading(node: Node): boolean {
if (!(node instanceof Element)) return false;
const tag = node.tagName.toLowerCase();
if (/^h[1-6]$/.test(tag)) return true;
const role = node.getAttribute('role');
return role?.toLowerCase() === 'heading';
}
/** Check if an element is visible to the user.
*
* It is coarse and meant for performance improvements, not precision.*/