feat(chunker): build new chunker with Burn (#1579)

2025-12-05 19:26:55 -06:00 · 2025-07-31 11:49:46 -06:00
parent bdc6b1b16b
commit 7f10ac6055
37 changed files with 45571 additions and 9374 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,4 +18,4 @@ opt-level = 3
 # Useful for debugging and profiling.
 [profile.release-debug]
 inherits = "release"
-debug = 2
+debug = 2
--- a/2
+++ b/2
@@ -11,7 +11,7 @@ RUN cargo install wasm-pack
 COPY . .

 WORKDIR /usr/build/harper-wasm
-RUN wasm-pack build --release --target web
+RUN RUSTFLAGS='--cfg getrandom_backend="wasm_js"' wasm-pack build --target web

 FROM node:${NODE_VERSION} AS node-build

--- a/harper-brill/finished_chunker/model.mpk
+++ b/harper-brill/finished_chunker/model.mpk
--- a/harper-brill/finished_chunker/vocab.json
+++ b/harper-brill/finished_chunker/vocab.json
--- a/harper-brill/src/lib.rs
+++ b/harper-brill/src/lib.rs
@@ -1,4 +1,7 @@
+use harper_pos_utils::{BurnChunkerCpu, CachedChunker};
 use lazy_static::lazy_static;
+use std::num::NonZero;
+use std::rc::Rc;
 use std::sync::Arc;

 pub use harper_pos_utils::{BrillChunker, BrillTagger, Chunker, FreqDict, Tagger, UPOS};
@@ -30,3 +33,21 @@ fn uncached_brill_chunker() -> BrillChunker {
 pub fn brill_chunker() -> Arc<BrillChunker> {
    (*BRILL_CHUNKER).clone()
 }
+
+const BURN_CHUNKER_VOCAB: &[u8; 627993] = include_bytes!("../finished_chunker/vocab.json");
+const BURN_CHUNKER_BIN: &[u8; 806312] = include_bytes!("../finished_chunker/model.mpk");
+
+thread_local! {
+    static BURN_CHUNKER: Rc<CachedChunker<BurnChunkerCpu>> =  Rc::new(uncached_burn_chunker());
+}
+
+fn uncached_burn_chunker() -> CachedChunker<BurnChunkerCpu> {
+    CachedChunker::new(
+        BurnChunkerCpu::load_from_bytes_cpu(BURN_CHUNKER_BIN, BURN_CHUNKER_VOCAB, 6, 0.3),
+        NonZero::new(10000).unwrap(),
+    )
+}
+
+pub fn burn_chunker() -> Rc<CachedChunker<BurnChunkerCpu>> {
+    (BURN_CHUNKER).with(|c| c.clone())
+}
--- a/harper-cli/Cargo.toml
+++ b/harper-cli/Cargo.toml
@@ -24,3 +24,4 @@ strum_macros = "0.27.2"

 [features]
 default = []
+training = ["harper-pos-utils/training"]
--- a/harper-cli/src/main.rs
+++ b/harper-cli/src/main.rs
@@ -21,7 +21,9 @@ use harper_core::{
    word_metadata_orthography::OrthFlags,
 };
 use harper_literate_haskell::LiterateHaskellParser;
-use harper_pos_utils::{BrillChunker, BrillTagger};
+#[cfg(feature = "training")]
+use harper_pos_utils::{BrillChunker, BrillTagger, BurnChunkerCpu};
+
 use harper_stats::Stats;
 use serde::Serialize;

@@ -101,6 +103,7 @@ enum Args {
        /// The document to mine words from.
        file: PathBuf,
    },
+    #[cfg(feature = "training")]
    TrainBrillTagger {
        #[arg(short, long, default_value = "1.0")]
        candidate_selection_chance: f32,
@@ -112,6 +115,7 @@ enum Args {
        #[arg(num_args = 1..)]
        datasets: Vec<PathBuf>,
    },
+    #[cfg(feature = "training")]
    TrainBrillChunker {
        #[arg(short, long, default_value = "1.0")]
        candidate_selection_chance: f32,
@@ -123,6 +127,27 @@ enum Args {
        #[arg(num_args = 1..)]
        datasets: Vec<PathBuf>,
    },
+    #[cfg(feature = "training")]
+    TrainBurnChunker {
+        #[arg(short, long)]
+        lr: f64,
+        // The number of embedding dimensions
+        #[arg(long)]
+        dim: usize,
+        /// The path to write the final  model file to.
+        #[arg(short, long)]
+        output: PathBuf,
+        /// The number of epochs to train.
+        #[arg(short, long)]
+        epochs: usize,
+        /// The dropout probability
+        #[arg(long)]
+        dropout: f32,
+        #[arg(short, long)]
+        test_file: PathBuf,
+        #[arg(num_args = 1..)]
+        datasets: Vec<PathBuf>,
+    },
    /// Print harper-core version.
    CoreVersion,
    /// Rename a flag in the dictionary and affixes.
@@ -476,6 +501,7 @@ fn main() -> anyhow::Result<()> {
            println!("harper-core v{}", harper_core::core_version());
            Ok(())
        }
+        #[cfg(feature = "training")]
        Args::TrainBrillTagger {
            datasets: dataset,
            epochs,
@@ -487,6 +513,7 @@ fn main() -> anyhow::Result<()> {

            Ok(())
        }
+        #[cfg(feature = "training")]
        Args::TrainBrillChunker {
            datasets,
            epochs,
@@ -497,6 +524,22 @@ fn main() -> anyhow::Result<()> {
            fs::write(output, serde_json::to_string_pretty(&chunker)?)?;
            Ok(())
        }
+        #[cfg(feature = "training")]
+        Args::TrainBurnChunker {
+            datasets,
+            test_file,
+            epochs,
+            dropout,
+            output,
+            lr,
+            dim: embed_dim,
+        } => {
+            let chunker =
+                BurnChunkerCpu::train_cpu(&datasets, &test_file, embed_dim, dropout, epochs, lr);
+            chunker.save_to(output);
+
+            Ok(())
+        }
        Args::RenameFlag { old, new, dir } => {
            use serde_json::Value;

--- a/harper-core/src/document.rs
+++ b/harper-core/src/document.rs
@@ -2,7 +2,7 @@ use std::cmp::Ordering;
 use std::collections::VecDeque;
 use std::fmt::Display;

-use harper_brill::{Chunker, Tagger, brill_chunker, brill_tagger};
+use harper_brill::{Chunker, Tagger, brill_tagger, burn_chunker};
 use paste::paste;

 use crate::expr::{Expr, ExprExt, FirstMatchOf, Repeating, SequenceExpr};
@@ -140,33 +140,37 @@ impl Document {
        self.condense_filename_extensions();
        self.match_quotes();

-        let token_strings: Vec<_> = self
-            .tokens
-            .iter()
-            .filter(|t| !t.kind.is_whitespace())
-            .map(|t| self.get_span_content_str(&t.span))
-            .collect();
+        let chunker = burn_chunker();
+        let tagger = brill_tagger();

-        let token_tags = brill_tagger().tag_sentence(&token_strings);
-        let np_flags = brill_chunker().chunk_sentence(&token_strings, &token_tags);
+        for sent in self.tokens.iter_sentences_mut() {
+            let token_strings: Vec<_> = sent
+                .iter()
+                .filter(|t| !t.kind.is_whitespace())
+                .map(|t| t.span.get_content_string(&self.source))
+                .collect();

-        let mut i = 0;
+            let token_tags = tagger.tag_sentence(&token_strings);
+            let np_flags = chunker.chunk_sentence(&token_strings, &token_tags);

-        // Annotate word metadata
-        for token in self.tokens.iter_mut() {
-            if let TokenKind::Word(meta) = &mut token.kind {
-                let word_source = token.span.get_content(&self.source);
-                let mut found_meta = dictionary.get_word_metadata(word_source).cloned();
+            let mut i = 0;

-                if let Some(inner) = &mut found_meta {
-                    inner.pos_tag = token_tags[i].or_else(|| inner.infer_pos_tag());
-                    inner.np_member = Some(np_flags[i]);
+            // Annotate word metadata
+            for token in sent.iter_mut() {
+                if let TokenKind::Word(meta) = &mut token.kind {
+                    let word_source = token.span.get_content(&self.source);
+                    let mut found_meta = dictionary.get_word_metadata(word_source).cloned();
+
+                    if let Some(inner) = &mut found_meta {
+                        inner.pos_tag = token_tags[i].or_else(|| inner.infer_pos_tag());
+                        inner.np_member = Some(np_flags[i]);
+                    }
+
+                    *meta = found_meta;
+                    i += 1;
+                } else if !token.kind.is_whitespace() {
+                    i += 1;
                }
-
-                *meta = found_meta;
-                i += 1;
-            } else if !token.kind.is_whitespace() {
-                i += 1;
            }
        }
    }
@@ -745,6 +749,10 @@ impl TokenStringExt for Document {
    fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_ {
        self.tokens.iter_sentences()
    }
+
+    fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &'_ mut [Token]> + '_ {
+        self.tokens.iter_sentences_mut()
+    }
 }

 impl Display for Document {
--- a/harper-core/src/token_string_ext.rs
+++ b/harper-core/src/token_string_ext.rs
@@ -99,6 +99,10 @@ pub trait TokenStringExt {
    /// Get an iterator over token slices that represent the individual
    /// sentences in a document.
    fn iter_sentences(&self) -> impl Iterator<Item = &'_ [Token]> + '_;
+
+    /// Get an iterator over mutable token slices that represent the individual
+    /// sentences in a document.
+    fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &'_ mut [Token]> + '_;
 }

 impl TokenStringExt for [Token] {
@@ -239,4 +243,32 @@ impl TokenStringExt for [Token] {

        first_sentence.into_iter().chain(rest).chain(last_sentence)
    }
+
+    fn iter_sentences_mut(&mut self) -> impl Iterator<Item = &mut [Token]> + '_ {
+        struct SentIter<'a> {
+            rem: &'a mut [Token],
+        }
+
+        impl<'a> Iterator for SentIter<'a> {
+            type Item = &'a mut [Token];
+
+            fn next(&mut self) -> Option<Self::Item> {
+                if self.rem.is_empty() {
+                    return None;
+                }
+                let split = self
+                    .rem
+                    .iter()
+                    .position(|t| t.kind.is_sentence_terminator())
+                    .map(|i| i + 1)
+                    .unwrap_or(self.rem.len());
+                let tmp = core::mem::take(&mut self.rem);
+                let (sent, rest) = tmp.split_at_mut(split);
+                self.rem = rest;
+                Some(sent)
+            }
+        }
+
+        SentIter { rem: self }
+    }
 }
--- a/harper-core/src/word_metadata.rs
+++ b/harper-core/src/word_metadata.rs
@@ -4,7 +4,7 @@ use itertools::Itertools;
 use paste::paste;
 use serde::{Deserialize, Serialize};
 use smallvec::SmallVec;
-use strum::{EnumCount, VariantArray};
+use strum::{EnumCount as _, VariantArray as _};
 use strum_macros::{Display, EnumCount, EnumString, VariantArray};

 use std::convert::TryFrom;
--- a/harper-core/tests/text/tagged/Alice's
+++ b/harper-core/tests/text/tagged/Alice's
--- a/harper-core/tests/text/tagged/Computer
+++ b/harper-core/tests/text/tagged/Computer
--- a/harper-core/tests/text/tagged/Difficult
+++ b/harper-core/tests/text/tagged/Difficult
--- a/harper-core/tests/text/tagged/Part-of-speech
+++ b/harper-core/tests/text/tagged/Part-of-speech
@@ -6,68 +6,68 @@
 # Unlintable Unlintable
 >            -->
 # Unlintable Unlintable
->            Part    - of - speech  tagging
-# Unlintable NSg/V/J . P  . N🅪Sg/V+ NSg/V
+>            Part     - of - speech  tagging
+# Unlintable NSg/V/J+ . P  . N🅪Sg/V+ NSg/V
 >
 #
-> In      corpus linguistics , part    - of - speech tagging ( POS  tagging or    PoS  tagging or
-# NPr/J/P NSg+   NᴹSg        . NSg/V/J . P  . N🅪Sg/V NSg/V   . NSg+ NSg/V   NPr/C NSg+ NSg/V   NPr/C
-> POST      ) , also called grammatical tagging is the process of marking up        a   word  in      a
-# NPr🅪/V/P+ . . W?   V/J    J           NSg/V   VL D   NSg/V   P  NSg/V   NSg/V/J/P D/P NSg/V NPr/J/P D/P
-> text   ( corpus ) as    corresponding to a   particular part    of speech  , based on  both   its
-# N🅪Sg/V . NSg+   . NSg/R NSg/V/J       P  D/P NSg/J      NSg/V/J P  N🅪Sg/V+ . V/J   J/P I/C/Dq ISg/D$+
+> In      corpus linguistics , part     - of - speech  tagging ( POS  tagging or    PoS  tagging or
+# NPr/J/P NSg+   NᴹSg+       . NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   . NSg+ NSg/V   NPr/C NSg+ NSg/V   NPr/C
+> POST      ) , also called grammatical tagging is the process of marking up        a   word   in      a
+# NPr🅪/V/P+ . . W?   V/J    J           NSg/V   VL D   NSg/V   P  NSg/V   NSg/V/J/P D/P NSg/V+ NPr/J/P D/P
+> text    ( corpus ) as    corresponding to a   particular part    of speech  , based on  both   its
+# N🅪Sg/V+ . NSg+   . NSg/R NSg/V/J       P  D/P NSg/J      NSg/V/J P  N🅪Sg/V+ . V/J   J/P I/C/Dq ISg/D$+
 > definition and its     context . A   simplified form  of this    is commonly taught to
 # NSg        V/C ISg/D$+ N🅪Sg/V+ . D/P V/J        NSg/V P  I/Ddem+ VL R        V      P
-> school - age    children , in      the identification of words  as    nouns , verbs  , adjectives ,
-# NSg/V  . N🅪Sg/V NPl      . NPr/J/P D   NSg            P  NPl/V+ NSg/R NPl/V . NPl/V+ . NPl/V      .
+> school - age     children , in      the identification of words  as    nouns , verbs  , adjectives ,
+# NSg/V  . N🅪Sg/V+ NPl+     . NPr/J/P D   NSg            P  NPl/V+ NSg/R NPl/V . NPl/V+ . NPl/V      .
 > adverbs , etc.
-# NPl/V   . W?
+# NPl/V   . +
 >
 #
 > Once  performed by      hand   , POS  tagging is now       done    in      the context of computational
-# NSg/C V/J       NSg/J/P NSg/V+ . NSg+ NSg/V   VL NPr/V/J/C NSg/V/J NPr/J/P D   N🅪Sg/V  P  J+
+# NSg/C V/J       NSg/J/P NSg/V+ . NSg+ NSg/V   VL NPr/V/J/C NSg/V/J NPr/J/P D   N🅪Sg/V  P  J
 > linguistics , using algorithms which associate discrete terms  , as    well    as    hidden
 # NᴹSg+       . V     NPl+       I/C+  NSg/V/J+  J        NPl/V+ . NSg/R NSg/V/J NSg/R V/J
-> parts of speech  , by      a   set     of descriptive tags   . POS  - tagging algorithms fall  into
-# NPl/V P  N🅪Sg/V+ . NSg/J/P D/P NPr/V/J P  NSg/J+      NPl/V+ . NSg+ . NSg/V   NPl        NSg/V P
-> two distinctive groups : rule   - based and  stochastic . E. Brill's tagger , one       of the
-# NSg NSg/J       NPl/V+ . NSg/V+ . V/J+  V/C+ J+         . ?  ?       NSg    . NSg/I/V/J P  D
+> parts of speech  , by      a   set     of descriptive tags   . POS  - tagging algorithms fall   into
+# NPl/V P  N🅪Sg/V+ . NSg/J/P D/P NPr/V/J P  NSg/J       NPl/V+ . NSg+ . NSg/V   NPl+       NSg/V+ P
+> two distinctive groups : rule   - based and stochastic . E. Brill's tagger , one       of the
+# NSg NSg/J       NPl/V+ . NSg/V+ . V/J   V/C J          . ?  ?       NSg    . NSg/I/V/J P  D
 > first   and most       widely used English   POS  - taggers , employs rule   - based algorithms .
 # NSg/V/J V/C NSg/I/J/Dq R      V/J  NPr🅪/V/J+ NSg+ . NPl     . NPl/V   NSg/V+ . V/J   NPl+       .
 >
 #
 > Principle
-# N🅪Sg/V
+# N🅪Sg/V+
 >
 #
-> Part    - of - speech tagging is harder than just having a   list  of words and their
-# NSg/V/J . P  . N🅪Sg/V NSg/V   VL JC     C/P  V/J  V      D/P NSg/V P  NPl/V V/C D$+
-> parts of speech  , because some      words  can    represent more         than one       part    of speech
-# NPl/V P  N🅪Sg/V+ . C/P     I/J/R/Dq+ NPl/V+ NPr/VX V         NPr/I/V/J/Dq C/P  NSg/I/V/J NSg/V/J P  N🅪Sg/V+
-> at    different times  , and because some     parts of speech  are complex  . This    is not
-# NSg/P NSg/J+    NPl/V+ . V/C C/P     I/J/R/Dq NPl/V P  N🅪Sg/V+ V+  NSg/V/J+ . I/Ddem+ VL NSg/C
-> rare    — in      natural languages ( as    opposed to many       artificial languages ) , a   large
-# NSg/V/J . NPr/J/P NSg/J   NPl/V+    . NSg/R V/J     P  NSg/I/J/Dq J          NPl/V+    . . D/P NSg/J
+> Part     - of - speech  tagging is harder than just having a   list  of words  and their
+# NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   VL JC     C/P  V/J  V      D/P NSg/V P  NPl/V+ V/C D$+
+> parts of speech  , because some     words  can    represent more         than one       part    of speech
+# NPl/V P  N🅪Sg/V+ . C/P     I/J/R/Dq NPl/V+ NPr/VX V         NPr/I/V/J/Dq C/P  NSg/I/V/J NSg/V/J P  N🅪Sg/V+
+> at    different times  , and because some     parts of speech  are complex . This    is not
+# NSg/P NSg/J     NPl/V+ . V/C C/P     I/J/R/Dq NPl/V P  N🅪Sg/V+ V   NSg/V/J . I/Ddem+ VL NSg/C
+> rare    — in      natural languages ( as    opposed to many        artificial languages ) , a   large
+# NSg/V/J . NPr/J/P NSg/J+  NPl/V+    . NSg/R V/J     P  NSg/I/J/Dq+ J+         NPl/V+    . . D/P NSg/J
 > percentage of word   - forms  are ambiguous . For example , even    " dogs   " , which is
-# NSg        P  NSg/V+ . NPl/V+ V+  J+        . C/P NSg/V+  . NSg/V/J . NPl/V+ . . I/C+  VL
-> usually thought of as    just a    plural noun   , can    also be     a   verb   :
-# R       NSg/V   P  NSg/R V/J  D/P+ NSg/J+ NSg/V+ . NPr/VX W?   NSg/VX D/P NSg/V+ .
+# NSg        P  NSg/V+ . NPl/V+ V   J         . C/P NSg/V+  . NSg/V/J . NPl/V+ . . I/C+  VL
+> usually thought of as    just a    plural noun   , can    also be     a    verb   :
+# R       NSg/V   P  NSg/R V/J  D/P+ NSg/J+ NSg/V+ . NPr/VX W?   NSg/VX D/P+ NSg/V+ .
 >
 #
-> The sailor dogs  the hatch  .
-# D+  NSg    NPl/V D   NSg/V+ .
+> The sailor dogs   the hatch  .
+# D+  NSg+   NPl/V+ D+  NSg/V+ .
 >
 #
-> Correct  grammatical tagging will   reflect that          " dogs   " is here    used as    a    verb   , not
-# NSg/V/J+ J           NSg/V   NPr/VX V       NSg/I/C/Ddem+ . NPl/V+ . VL NSg/J/R V/J  NSg/R D/P+ NSg/V+ . NSg/C
-> as    the more         common  plural noun   . Grammatical context is one       way    to determine
-# NSg/R D   NPr/I/V/J/Dq NSg/V/J NSg/J  NSg/V+ . J           N🅪Sg/V+ VL NSg/I/V/J NSg/J+ P  V
+> Correct grammatical tagging will   reflect that          " dogs   " is here    used as    a   verb   , not
+# NSg/V/J J           NSg/V   NPr/VX V       NSg/I/C/Ddem+ . NPl/V+ . VL NSg/J/R V/J  NSg/R D/P NSg/V+ . NSg/C
+> as    the more         common  plural noun   . Grammatical context is one       way   to determine
+# NSg/R D   NPr/I/V/J/Dq NSg/V/J NSg/J  NSg/V+ . J+          N🅪Sg/V+ VL NSg/I/V/J NSg/J P  V
 > this    ; semantic analysis can    also be     used to infer that          " sailor " and " hatch "
-# I/Ddem+ . NSg/J    N🅪Sg+    NPr/VX W?   NSg/VX V/J  P  V     NSg/I/C/Ddem+ . NSg+   . V/C . NSg/V .
-> implicate " dogs  " as    1 ) in      the nautical context and 2 ) an  action   applied to the
-# NSg/V     . NPl/V . NSg/R # . NPr/J/P D+  J+       N🅪Sg/V+ V/C # . D/P NSg/V/J+ V/J     P  D
-> object " hatch " ( in      this    context , " dogs   " is a   nautical term     meaning   " fastens ( a
-# NSg/V+ . NSg/V . . NPr/J/P I/Ddem+ N🅪Sg/V+ . . NPl/V+ . VL D/P J        NSg/V/J+ N🅪Sg/V/J+ . V       . D/P
+# I/Ddem+ . NSg/J+   N🅪Sg+    NPr/VX W?   NSg/VX V/J  P  V     NSg/I/C/Ddem+ . NSg+   . V/C . NSg/V .
+> implicate " dogs   " as    1 ) in      the nautical context and 2 ) an  action   applied to the
+# NSg/V     . NPl/V+ . NSg/R # . NPr/J/P D   J        N🅪Sg/V+ V/C # . D/P NSg/V/J+ V/J     P  D
+> object " hatch " ( in      this   context , " dogs   " is a   nautical term     meaning   " fastens ( a
+# NSg/V+ . NSg/V . . NPr/J/P I/Ddem N🅪Sg/V+ . . NPl/V+ . VL D/P J        NSg/V/J+ N🅪Sg/V/J+ . V       . D/P
 > watertight door   ) securely " ) .
 # J          NSg/V+ . R        . . .
 >
@@ -76,358 +76,358 @@
 # NSg/V+ NPl/V
 >
 #
-> Schools commonly teach that         there are 9 parts of speech  in      English  : noun   , verb   ,
-# NPl/V+  R        NSg/V NSg/I/C/Ddem +     V   # NPl/V P  N🅪Sg/V+ NPr/J/P NPr🅪/V/J . NSg/V+ . NSg/V+ .
+> Schools commonly teach that         there are 9 parts of speech in      English  : noun   , verb   ,
+# NPl/V+  R        NSg/V NSg/I/C/Ddem +     V   # NPl/V P  N🅪Sg/V NPr/J/P NPr🅪/V/J . NSg/V+ . NSg/V+ .
 > article , adjective , preposition , pronoun , adverb , conjunction , and interjection .
 # NSg/V+  . NSg/V/J+  . NSg/V       . NSg/V+  . NSg/V+ . NSg/V+      . V/C NSg+         .
-> However , there are clearly many       more         categories and sub     - categories . For nouns ,
-# C       . +     V   R       NSg/I/J/Dq NPr/I/V/J/Dq NPl+       V/C NSg/V/P . NPl        . C/P NPl/V .
-> the plural , possessive , and singular forms  can     be      distinguished . In      many
-# D   NSg/J  . NSg/J      . V/C NSg/J    NPl/V+ NPr/VX+ NSg/VX+ V/J+          . NPr/J/P NSg/I/J/Dq+
-> languages words  are also marked for their " case   " ( role as    subject , object ,
-# NPl/V+    NPl/V+ V   W?   V/J    C/P D$+   . NPr/V+ . . NSg  NSg/R NSg/V/J . NSg/V+ .
+> However , there are clearly many        more          categories and sub     - categories . For nouns ,
+# C       . +     V   R       NSg/I/J/Dq+ NPr/I/V/J/Dq+ NPl+       V/C NSg/V/P . NPl+       . C/P NPl/V .
+> the plural , possessive , and singular forms  can    be     distinguished . In      many
+# D   NSg/J  . NSg/J      . V/C NSg/J    NPl/V+ NPr/VX NSg/VX V/J           . NPr/J/P NSg/I/J/Dq+
+> languages words  are also marked for their " case   " ( role as    subject  , object ,
+# NPl/V+    NPl/V+ V   W?   V/J    C/P D$+   . NPr/V+ . . NSg  NSg/R NSg/V/J+ . NSg/V+ .
 > etc. ) , grammatical gender   , and so        on  ; while     verbs  are marked for tense   , aspect ,
 # +    . . J+          NSg/V/J+ . V/C NSg/I/J/C J/P . NSg/V/C/P NPl/V+ V   V/J    C/P NSg/V/J . NSg/V+ .
-> and other    things . In      some      tagging systems , different inflections of the same
-# V/C NSg/V/J+ NPl/V+ . NPr/J/P I/J/R/Dq+ NSg/V   NPl+    . NSg/J     NPl         P  D+  I/J+
+> and other    things . In      some     tagging systems , different inflections of the same
+# V/C NSg/V/J+ NPl/V+ . NPr/J/P I/J/R/Dq NSg/V   NPl+    . NSg/J     NPl         P  D   I/J
 > root   word   will   get   different parts of speech  , resulting in      a   large number   of
-# NPr/V+ NSg/V+ NPr/VX NSg/V NSg/J     NPl/V P  N🅪Sg/V+ . V         NPr/J/P D/P NSg/J NSg/V/JC P+
-> tags   . For example , NN for singular common   nouns , NNS for plural common   nouns , NP
-# NPl/V+ . C/P NSg/V+  . ?  C/P NSg/J    NSg/V/J+ NPl/V . ?   C/P NSg/J  NSg/V/J+ NPl/V . NPr
-> for singular proper nouns ( see   the POS  tags   used in      the Brown    Corpus ) . Other
-# C/P NSg/J    NSg/J  NPl/V . NSg/V D+  NSg+ NPl/V+ V/J  NPr/J/P D+  NPr/V/J+ NSg+   . . NSg/V/J
+# NPr/V+ NSg/V+ NPr/VX NSg/V NSg/J     NPl/V P  N🅪Sg/V+ . V         NPr/J/P D/P NSg/J NSg/V/JC P
+> tags   . For example , NN for singular common  nouns , NNS for plural common  nouns , NP
+# NPl/V+ . C/P NSg/V+  . ?  C/P NSg/J    NSg/V/J NPl/V . ?   C/P NSg/J  NSg/V/J NPl/V . NPr
+> for singular proper nouns ( see   the POS  tags   used in      the Brown   Corpus ) . Other
+# C/P NSg/J    NSg/J  NPl/V . NSg/V D   NSg+ NPl/V+ V/J  NPr/J/P D   NPr/V/J NSg+   . . NSg/V/J
 > tagging systems use   a   smaller number   of tags   and ignore fine    differences or
 # NSg/V   NPl+    NSg/V D/P NSg/JC  NSg/V/JC P  NPl/V+ V/C V      NSg/V/J NSg/V       NPr/C
-> model    them     as     features somewhat independent from part    - of - speech  .
-# NSg/V/J+ NSg/IPl+ NSg/R+ NPl/V+   NSg/I    NSg/J       P    NSg/V/J . P  . N🅪Sg/V+ .
+> model    them     as    features somewhat independent from part     - of - speech  .
+# NSg/V/J+ NSg/IPl+ NSg/R NPl/V+   NSg/I    NSg/J       P    NSg/V/J+ . P  . N🅪Sg/V+ .
 >
 #
-> In      part    - of - speech tagging by      computer , it       is typical to distinguish from 50 to
-# NPr/J/P NSg/V/J . P  . N🅪Sg/V NSg/V   NSg/J/P NSg/V+   . NPr/ISg+ VL NSg/J   P  V           P    #  P
+> In      part     - of - speech  tagging by      computer , it       is typical to distinguish from 50 to
+# NPr/J/P NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   NSg/J/P NSg/V+   . NPr/ISg+ VL NSg/J   P  V           P    #  P
 > 150 separate parts of speech for English   . Work  on  stochastic methods for tagging
-# #   NSg/V/J  NPl/V P  N🅪Sg/V C/P NPr🅪/V/J+ . NSg/V J/P J          NPl/V   C/P NSg/V
+# #   NSg/V/J  NPl/V P  N🅪Sg/V C/P NPr🅪/V/J+ . NSg/V J/P J          NPl/V+  C/P NSg/V
 > Koine Greek   ( DeRose 1990 ) has used over      1 , 000 parts of speech  and found that
 # ?     NPr/V/J . ?      #    . V   V/J  NSg/V/J/P # . #   NPl/V P  N🅪Sg/V+ V/C NSg/V NSg/I/C/Ddem
-> about as    many        words  were  ambiguous in      that          language as    in      English   . A
-# J/P   NSg/R NSg/I/J/Dq+ NPl/V+ NSg/V J         NPr/J/P NSg/I/C/Ddem+ N🅪Sg/V+  NSg/R NPr/J/P NPr🅪/V/J+ . D/P
+> about as    many       words  were  ambiguous in      that         language as    in      English   . A
+# J/P   NSg/R NSg/I/J/Dq NPl/V+ NSg/V J         NPr/J/P NSg/I/C/Ddem N🅪Sg/V+  NSg/R NPr/J/P NPr🅪/V/J+ . D/P
 > morphosyntactic descriptor in      the case  of morphologically rich    languages is
 # ?               NSg        NPr/J/P D   NPr/V P  ?               NPr/V/J NPl/V+    VL
-> commonly expressed using very short      mnemonics , such  as    Ncmsan for Category = Noun   ,
-# R        V/J       V     J/R  NPr/V/J/P+ NPl       . NSg/I NSg/R ?      C/P NSg      . NSg/V+ .
-> Type  = common  , Gender  = masculine , Number   = singular , Case  = accusative , Animate
-# NSg/V . NSg/V/J . NSg/V/J . NSg/J     . NSg/V/JC . NSg/J    . NPr/V . NSg/J      . V/J
+> commonly expressed using very short     mnemonics , such  as    Ncmsan for Category = Noun   ,
+# R        V/J       V     J/R  NPr/V/J/P NPl       . NSg/I NSg/R ?      C/P NSg+     . NSg/V+ .
+> Type   = common  , Gender   = masculine , Number    = singular , Case   = accusative , Animate
+# NSg/V+ . NSg/V/J . NSg/V/J+ . NSg/J     . NSg/V/JC+ . NSg/J    . NPr/V+ . NSg/J      . V/J
 > = no    .
 # . NPr/P .
 >
 #
-> The most       popular " tag    set     " for POS  tagging for American English   is probably the
-# D   NSg/I/J/Dq NSg/J   . NSg/V+ NPr/V/J . C/P NSg+ NSg/V   C/P NPr/J    NPr🅪/V/J+ VL R        D+
+> The most       popular " tag   set     " for POS  tagging for American English   is probably the
+# D   NSg/I/J/Dq NSg/J   . NSg/V NPr/V/J . C/P NSg+ NSg/V   C/P NPr/J    NPr🅪/V/J+ VL R        D
 > Penn tag    set     , developed in      the Penn Treebank project . It       is largely similar to
-# NPr+ NSg/V+ NPr/V/J . V/J       NPr/J/P D+  NPr+ ?        NSg/V+  . NPr/ISg+ VL R       NSg/J   P
-> the earlier Brown   Corpus and LOB   Corpus tag    sets  , though much        smaller . In
-# D   JC      NPr/V/J NSg    V/C NSg/V NSg+   NSg/V+ NPl/V . V/C    NSg/I/J/Dq+ NSg/JC+ . NPr/J/P
+# NPr+ NSg/V+ NPr/V/J . V/J       NPr/J/P D   NPr+ ?        NSg/V+  . NPr/ISg+ VL R       NSg/J   P
+> the earlier Brown   Corpus and LOB   Corpus tag    sets  , though much       smaller . In
+# D   JC      NPr/V/J NSg    V/C NSg/V NSg+   NSg/V+ NPl/V . V/C    NSg/I/J/Dq NSg/JC  . NPr/J/P
 > Europe , tag    sets  from the Eagles Guidelines see   wide  use    and include versions
-# NPr+   . NSg/V+ NPl/V P    D+  NPl/V+ NPl+       NSg/V NSg/J NSg/V+ V/C NSg/V   NPl/V
-> for multiple  languages .
-# C/P NSg/J/Dq+ NPl/V+    .
+# NPr+   . NSg/V+ NPl/V P    D   NPl/V  NPl+       NSg/V NSg/J NSg/V+ V/C NSg/V   NPl/V+
+> for multiple languages .
+# C/P NSg/J/Dq NPl/V+    .
 >
 #
 > POS  tagging work   has been  done    in      a   variety of languages , and the set     of POS
 # NSg+ NSg/V   NSg/V+ V   NSg/V NSg/V/J NPr/J/P D/P NSg     P  NPl/V+    . V/C D   NPr/V/J P  NSg+
 > tags   used varies greatly with language . Tags   usually are designed to include
 # NPl/V+ V/J  NPl/V  R       P    N🅪Sg/V+  . NPl/V+ R       V   V/J      P  NSg/V
-> overt morphological distinctions , although this    leads to inconsistencies such  as
-# NSg/J J+            NPl+         . C        I/Ddem+ NPl/V P  NPl             NSg/I NSg/R
+> overt  morphological distinctions , although this   leads to inconsistencies such  as
+# NSg/J+ J+            NPl+         . C        I/Ddem NPl/V P  NPl             NSg/I NSg/R
 > case   - marking for pronouns but     not   nouns in      English   , and much       larger
 # NPr/V+ . NSg/V   C/P NPl/V    NSg/C/P NSg/C NPl/V NPr/J/P NPr🅪/V/J+ . V/C NSg/I/J/Dq JC
 > cross      - language differences . The tag    sets  for heavily inflected languages such  as
-# NPr/V/J/P+ . N🅪Sg/V+  NSg/V       . D+  NSg/V+ NPl/V C/P R       V/J       NPl/V+    NSg/I NSg/R
+# NPr/V/J/P+ . N🅪Sg/V+  NSg/V+      . D+  NSg/V+ NPl/V C/P R       V/J       NPl/V+    NSg/I NSg/R
 > Greek   and Latin can    be     very large ; tagging words  in      agglutinative languages such
 # NPr/V/J V/C NPr/J NPr/VX NSg/VX J/R  NSg/J . NSg/V   NPl/V+ NPr/J/P ?             NPl/V+    NSg/I
-> as    Inuit languages may    be     virtually impossible . At    the other    extreme , Petrov et
-# NSg/R NPr/J NPl/V+    NPr/VX NSg/VX R+        NSg/J+     . NSg/P D+  NSg/V/J+ NSg/J   . ?      ?
+> as    Inuit languages may    be     virtually impossible . At    the other   extreme , Petrov et
+# NSg/R NPr/J NPl/V+    NPr/VX NSg/VX R         NSg/J      . NSg/P D   NSg/V/J NSg/J   . ?      ?
 > al. have   proposed a   " universal " tag    set     , with 12 categories ( for example , no
-# ?   NSg/VX V/J      D/P . NSg/J     . NSg/V+ NPr/V/J . P    #  NPl        . C/P NSg/V+  . NPr/P
-> subtypes of nouns , verbs  , punctuation , and so        on   ) . Whether a   very small   set     of
-# NPl      P  NPl/V . NPl/V+ . NᴹSg+       . V/C NSg/I/J/C J/P+ . . I/C     D/P J/R  NPr/V/J NPr/V/J P
-> very broad tags  or    a   much       larger set     of more         precise ones   is preferable , depends
-# J/R  NSg/J NPl/V NPr/C D/P NSg/I/J/Dq JC     NPr/V/J P  NPr/I/V/J/Dq V/J     NPl/V+ VL W?         . NPl/V
-> on  the purpose at    hand   . Automatic tagging is easier on  smaller tag    - sets   .
-# J/P D+  NSg/V   NSg/P NSg/V+ . NSg/J     NSg/V   VL NSg/JC J/P NSg/JC  NSg/V+ . NPl/V+ .
+# ?   NSg/VX V/J      D/P . NSg/J     . NSg/V+ NPr/V/J . P    #  NPl+       . C/P NSg/V+  . NPr/P
+> subtypes of nouns , verbs  , punctuation , and so        on  ) . Whether a   very small   set     of
+# NPl      P  NPl/V . NPl/V+ . NᴹSg+       . V/C NSg/I/J/C J/P . . I/C     D/P J/R  NPr/V/J NPr/V/J P
+> very broad tags   or    a   much       larger set     of more         precise ones   is preferable , depends
+# J/R  NSg/J NPl/V+ NPr/C D/P NSg/I/J/Dq JC     NPr/V/J P  NPr/I/V/J/Dq V/J+    NPl/V+ VL W?         . NPl/V
+> on  the purpose at    hand   . Automatic tagging is easier on  smaller tag    - sets  .
+# J/P D   NSg/V+  NSg/P NSg/V+ . NSg/J     NSg/V   VL NSg/JC J/P NSg/JC  NSg/V+ . NPl/V .
 >
 #
 > History
-# N🅪Sg
+# N🅪Sg+
 >
 #
 > The Brown    Corpus
-# D   NPr/V/J+ NSg
+# D+  NPr/V/J+ NSg+
 >
 #
-> Research on  part    - of - speech tagging has been  closely tied to corpus linguistics .
-# NᴹSg/V   J/P NSg/V/J . P  . N🅪Sg/V NSg/V   V   NSg/V R       V/J  P  NSg    NᴹSg+       .
-> The first   major   corpus of English   for computer analysis was the Brown   Corpus
-# D   NSg/V/J NPr/V/J NSg    P  NPr🅪/V/J+ C/P NSg/V+   N🅪Sg+    V   D   NPr/V/J NSg
+> Research on  part     - of - speech  tagging has been  closely tied to corpus linguistics .
+# NᴹSg/V   J/P NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   V   NSg/V R       V/J  P  NSg    NᴹSg+       .
+> The first   major   corpus of English  for computer analysis was the Brown   Corpus
+# D   NSg/V/J NPr/V/J NSg    P  NPr🅪/V/J C/P NSg/V+   N🅪Sg+    V   D   NPr/V/J NSg
 > developed at    Brown   University by      Henry Kučera and W. Nelson Francis , in      the
-# V/J       NSg/P NPr/V/J NSg        NSg/J/P NPr+  ?      V/C ?  NPr+   NPr+    . NPr/J/P D
+# V/J       NSg/P NPr/V/J NSg+       NSg/J/P NPr+  ?      V/C ?  NPr+   NPr+    . NPr/J/P D
 > mid      - 1960s . It       consists of about 1 , 000 , 000 words of running   English   prose text    ,
 # NSg/J/P+ . #d    . NPr/ISg+ NPl/V    P  J/P   # . #   . #   NPl/V P  NSg/V/J/P NPr🅪/V/J+ NSg/V N🅪Sg/V+ .
 > made up        of 500 samples from randomly chosen   publications . Each sample is 2 , 000
-# V    NSg/V/J/P P  #   NPl/V+  P    R+       NᴹSg/V/J NPl+         . Dq+  NSg/V+ VL # . #
-> or    more         words  ( ending at    the first    sentence - end   after 2 , 000 words  , so        that         the
-# NPr/C NPr/I/V/J/Dq NPl/V+ . NSg/V  NSg/P D   NSg/V/J+ NSg/V+   . NSg/V P     # . #   NPl/V+ . NSg/I/J/C NSg/I/C/Ddem D+
+# V    NSg/V/J/P P  #   NPl/V+  P    R        NᴹSg/V/J NPl+         . Dq+  NSg/V+ VL # . #
+> or    more         words  ( ending at    the first   sentence - end    after 2 , 000 words  , so        that         the
+# NPr/C NPr/I/V/J/Dq NPl/V+ . NSg/V  NSg/P D   NSg/V/J NSg/V+   . NSg/V+ P     # . #   NPl/V+ . NSg/I/J/C NSg/I/C/Ddem D
 > corpus contains only  complete sentences ) .
-# NSg+   V        J/R/C NSg/V/J+ NPl/V+    . .
+# NSg+   V        J/R/C NSg/V/J  NPl/V+    . .
 >
 #
-> The Brown   Corpus was painstakingly " tagged " with part    - of - speech markers over
-# D+  NPr/V/J NSg    V   R             . V/J    . P    NSg/V/J . P  . N🅪Sg/V NPl/V   NSg/V/J/P
-> many        years . A    first    approximation was done    with a   program by      Greene and Rubin ,
-# NSg/I/J/Dq+ NPl+  . D/P+ NSg/V/J+ NSg+          V   NSg/V/J P    D/P NPr/V   NSg/J/P NPr    V/C NPr   .
+> The Brown    Corpus was painstakingly " tagged " with part     - of - speech  markers over
+# D+  NPr/V/J+ NSg+   V   R             . V/J    . P    NSg/V/J+ . P  . N🅪Sg/V+ NPl/V   NSg/V/J/P
+> many        years . A    first    approximation was done    with a    program by      Greene and Rubin ,
+# NSg/I/J/Dq+ NPl+  . D/P+ NSg/V/J+ NSg+          V   NSg/V/J P    D/P+ NPr/V+  NSg/J/P NPr    V/C NPr   .
 > which consisted of a   huge handmade list  of what   categories could  co       - occur at
-# I/C+  V/J       P  D/P J    NSg/J    NSg/V P  NSg/I+ NPl+       NSg/VX NPr/I/V+ . V     NSg/P+
+# I/C+  V/J       P  D/P J    NSg/J    NSg/V P  NSg/I+ NPl+       NSg/VX NPr/I/V+ . V     NSg/P
 > all          . For example , article then    noun   can    occur , but     article then    verb   ( arguably )
 # NSg/I/J/C/Dq . C/P NSg/V+  . NSg/V+  NSg/J/C NSg/V+ NPr/VX V     . NSg/C/P NSg/V+  NSg/J/C NSg/V+ . R        .
-> cannot . The program got about 70 % correct  . Its     results were  repeatedly reviewed
-# NSg/V  . D+  NPr/V+  V   J/P   #  . NSg/V/J+ . ISg/D$+ NPl/V+  NSg/V R          V/J
+> cannot . The program got about 70 % correct . Its     results were  repeatedly reviewed
+# NSg/V  . D+  NPr/V+  V   J/P   #  . NSg/V/J . ISg/D$+ NPl/V+  NSg/V R          V/J
 > and corrected by      hand   , and later users sent  in      errata so        that          by      the late  70 s
 # V/C V/J       NSg/J/P NSg/V+ . V/C JC    NPl+  NSg/V NPr/J/P NSg    NSg/I/J/C NSg/I/C/Ddem+ NSg/J/P D   NSg/J #  ?
 > the tagging was nearly perfect ( allowing for some     cases  on  which even    human
-# D   NSg/V   V   R      NSg/V/J . V        C/P I/J/R/Dq NPl/V+ J/P I/C+  NSg/V/J NSg/V/J
+# D   NSg/V   V   R      NSg/V/J . V        C/P I/J/R/Dq NPl/V+ J/P I/C+  NSg/V/J NSg/V/J+
 > speakers might     not   agree ) .
 # +        NᴹSg/VX/J NSg/C V     . .
 >
 #
 > This    corpus has been  used for innumerable studies of word   - frequency and of
-# I/Ddem+ NSg    V   NSg/V V/J  C/P J           NPl/V   P  NSg/V+ . NSg       V/C P
-> part    - of - speech and inspired the development of similar " tagged " corpora in      many
-# NSg/V/J . P  . N🅪Sg/V V/C V/J      D   N🅪Sg        P  NSg/J   . V/J    . NPl     NPr/J/P NSg/I/J/Dq+
-> other    languages . Statistics derived by      analyzing it       formed the basis for most
-# NSg/V/J+ NPl/V+    . NPl/V+     V/J     NSg/J/P V         NPr/ISg+ V/J    D   NSg   C/P NSg/I/J/Dq
-> later part    - of - speech tagging systems , such  as    CLAWS  and VOLSUNGA . However , by
-# JC    NSg/V/J . P  . N🅪Sg/V NSg/V   NPl     . NSg/I NSg/R NPl/V+ V/C ?        . C       . NSg/J/P
+# I/Ddem+ NSg+   V   NSg/V V/J  C/P J           NPl/V   P  NSg/V+ . NSg       V/C P
+> part     - of - speech  and inspired the development of similar " tagged " corpora in      many
+# NSg/V/J+ . P  . N🅪Sg/V+ V/C V/J      D   N🅪Sg        P  NSg/J   . V/J    . NPl+    NPr/J/P NSg/I/J/Dq
+> other   languages . Statistics derived by      analyzing it       formed the basis for most
+# NSg/V/J NPl/V+    . NPl/V+     V/J     NSg/J/P V         NPr/ISg+ V/J    D+  NSg+  C/P NSg/I/J/Dq
+> later part     - of - speech  tagging systems , such  as    CLAWS  and VOLSUNGA . However , by
+# JC    NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   NPl+    . NSg/I NSg/R NPl/V+ V/C ?        . C       . NSg/J/P
 > this    time      ( 2005 ) it       has been  superseded by      larger corpora such  as    the 100
 # I/Ddem+ N🅪Sg/V/J+ . #    . NPr/ISg+ V   NSg/V V/J        NSg/J/P JC     NPl+    NSg/I NSg/R D   #
 > million word   British National Corpus , even    though larger corpora are rarely so
-# NSg     NSg/V+ NPr/J   NSg/J+   NSg+   . NSg/V/J V/C    JC+    NPl+    V   R      NSg/I/J/C
+# NSg     NSg/V+ NPr/J   NSg/J    NSg+   . NSg/V/J V/C    JC     NPl+    V   R      NSg/I/J/C
 > thoroughly curated .
-# R+         V/J+    .
+# R          V/J     .
 >
 #
-> For some     time     , part    - of - speech tagging was considered an  inseparable part    of
-# C/P I/J/R/Dq N🅪Sg/V/J . NSg/V/J . P  . N🅪Sg/V NSg/V   V   V/J        D/P NSg/J       NSg/V/J P
+> For some     time      , part     - of - speech  tagging was considered an  inseparable part    of
+# C/P I/J/R/Dq N🅪Sg/V/J+ . NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   V   V/J        D/P NSg/J       NSg/V/J P
 > natural language processing , because there are certain cases  where the correct
-# NSg/J+  N🅪Sg/V+  V+         . C/P     +     V   I/J     NPl/V+ NSg/C D   NSg/V/J
+# NSg/J   N🅪Sg/V+  V+         . C/P     +     V   I/J     NPl/V+ NSg/C D   NSg/V/J
 > part    of speech  cannot be     decided without understanding the semantics or    even    the
-# NSg/V/J P  N🅪Sg/V+ NSg/V  NSg/VX NSg/V/J C/P     NᴹSg/V/J+     D+  NPl       NPr/C NSg/V/J D
+# NSg/V/J P  N🅪Sg/V+ NSg/V  NSg/VX NSg/V/J C/P     NᴹSg/V/J+     D   NPl+      NPr/C NSg/V/J D
 > pragmatics of the context . This    is extremely expensive , especially because
-# NPl        P  D+  N🅪Sg/V+ . I/Ddem+ VL R         J         . R          C/P
+# NPl        P  D   N🅪Sg/V+ . I/Ddem+ VL R         J         . R          C/P
 > analyzing the higher  levels is much       harder when    multiple part    - of - speech
-# V         D+  NSg/JC+ NPl/V+ VL NSg/I/J/Dq JC     NSg/I/C NSg/J/Dq NSg/V/J . P  . N🅪Sg/V
+# V         D+  NSg/JC+ NPl/V+ VL NSg/I/J/Dq JC     NSg/I/C NSg/J/Dq NSg/V/J . P  . N🅪Sg/V+
 > possibilities must  be     considered for each word   .
-# NPl           NSg/V NSg/VX V/J        C/P Dq+  NSg/V+ .
+# NPl+          NSg/V NSg/VX V/J        C/P Dq+  NSg/V+ .
 >
 #
 > Use   of hidden Markov models
-# NSg/V P  V/J    NPr+   NPl/V
+# NSg/V P  V/J    NPr    NPl/V+
 >
 #
-> In      the mid     - 1980s , researchers in      Europe began to use   hidden Markov models ( HMMs )
-# NPr/J/P D   NSg/J/P . #d    . NPl         NPr/J/P NPr+   V     P  NSg/V V/J    NPr    NPl/V+ . ?    .
+> In      the mid      - 1980s , researchers in      Europe began to use   hidden Markov models ( HMMs )
+# NPr/J/P D   NSg/J/P+ . #d    . NPl         NPr/J/P NPr+   V     P  NSg/V V/J    NPr    NPl/V+ . ?    .
 > to disambiguate parts of speech  , when    working to tag   the Lancaster - Oslo - Bergen
-# P  V            NPl/V P  N🅪Sg/V+ . NSg/I/C V       P  NSg/V D   NPr       . NPr+ . NPr
-> Corpus of British English   . HMMs involve counting cases ( such  as    from the Brown
-# NSg    P  NPr/J+  NPr🅪/V/J+ . ?    V       V        NPl/V . NSg/I NSg/R P    D+  NPr/V/J+
+# P  V            NPl/V P  N🅪Sg/V+ . NSg/I/C V       P  NSg/V D   NPr       . NPr+ . NPr+
+> Corpus of British English   . HMMs involve counting cases  ( such  as    from the Brown
+# NSg    P  NPr/J   NPr🅪/V/J+ . ?    V       V        NPl/V+ . NSg/I NSg/R P    D   NPr/V/J
 > Corpus ) and making a   table of the probabilities of certain sequences . For
-# NSg+   . V/C NSg/V  D/P NSg/V P  D   NPl           P  I/J+    NPl/V+    . C/P
-> example , once  you've seen  an  article such  as    ' the ' , perhaps the next     word   is a
-# NSg/V+  . NSg/C W?     NSg/V D/P NSg/V+  NSg/I NSg/R . D   . . NSg     D+  NSg/J/P+ NSg/V+ VL D/P
-> noun  40 % of the time      , an   adjective 40 % , and a    number    20 % . Knowing   this    , a
-# NSg/V #  . P  D+  N🅪Sg/V/J+ . D/P+ NSg/V/J+  #  . . V/C D/P+ NSg/V/JC+ #  . . NSg/V/J/P I/Ddem+ . D/P+
+# NSg+   . V/C NSg/V  D/P NSg/V P  D   NPl           P  I/J     NPl/V+    . C/P
+> example , once  you've seen  an  article such  as    ' the ' , perhaps the next    word   is a
+# NSg/V+  . NSg/C W?     NSg/V D/P NSg/V+  NSg/I NSg/R . D   . . NSg     D   NSg/J/P NSg/V+ VL D/P
+> noun   40 % of the time      , an  adjective 40 % , and a   number    20 % . Knowing   this    , a
+# NSg/V+ #  . P  D   N🅪Sg/V/J+ . D/P NSg/V/J+  #  . . V/C D/P NSg/V/JC+ #  . . NSg/V/J/P I/Ddem+ . D/P+
 > program can    decide that          " can    " in      " the can    " is far     more         likely to be     a   noun  than
 # NPr/V+  NPr/VX V      NSg/I/C/Ddem+ . NPr/VX . NPr/J/P . D+  NPr/VX . VL NSg/V/J NPr/I/V/J/Dq NSg/J  P  NSg/VX D/P NSg/V C/P
-> a   verb  or    a    modal  . The same method can    , of course , be     used to benefit from
-# D/P NSg/V NPr/C D/P+ NSg/J+ . D+  I/J+ NSg/V+ NPr/VX . P  NSg/V+ . NSg/VX V/J  P  NSg/V   P
-> knowledge about the following  words .
-# NᴹSg+     J/P   D+  NSg/V/J/P+ NPl/V .
+> a    verb   or    a   modal . The same method can    , of course , be     used to benefit from
+# D/P+ NSg/V+ NPr/C D/P NSg/J . D+  I/J+ NSg/V+ NPr/VX . P  NSg/V+ . NSg/VX V/J  P  NSg/V   P
+> knowledge about the following words  .
+# NᴹSg+     J/P   D+  NSg/V/J/P NPl/V+ .
 >
 #
 > More         advanced ( " higher - order " ) HMMs learn the probabilities not   only  of pairs
-# NPr/I/V/J/Dq V/J      . . NSg/JC . NSg/V . . ?    NSg/V D+  NPl+          NSg/C J/R/C P  NPl/V+
+# NPr/I/V/J/Dq V/J      . . NSg/JC . NSg/V . . ?    NSg/V D   NPl+          NSg/C J/R/C P  NPl/V+
 > but     triples or    even    larger sequences . So        , for example , if    you've just seen  a
 # NSg/C/P NPl/V   NPr/C NSg/V/J JC     NPl/V+    . NSg/I/J/C . C/P NSg/V+  . NSg/C W?     V/J  NSg/V D/P
-> noun  followed by      a    verb   , the next     item   may    be     very likely a   preposition ,
-# NSg/V V/J      NSg/J/P D/P+ NSg/V+ . D+  NSg/J/P+ NSg/V+ NPr/VX NSg/VX J/R  NSg/J  D/P NSg/V       .
-> article , or    noun   , but     much       less    likely another verb  .
-# NSg/V+  . NPr/C NSg/V+ . NSg/C/P NSg/I/J/Dq V/J/C/P NSg/J+ I/D     NSg/V .
+> noun   followed by      a   verb   , the next    item   may    be     very likely a   preposition ,
+# NSg/V+ V/J      NSg/J/P D/P NSg/V+ . D   NSg/J/P NSg/V+ NPr/VX NSg/VX J/R  NSg/J  D/P NSg/V       .
+> article , or    noun   , but     much       less    likely another verb   .
+# NSg/V+  . NPr/C NSg/V+ . NSg/C/P NSg/I/J/Dq V/J/C/P NSg/J  I/D     NSg/V+ .
 >
 #
 > When    several ambiguous words  occur together , the possibilities multiply .
-# NSg/I/C J/Dq    J         NPl/V+ V     J        . D+  NPl           NSg/V+   .
+# NSg/I/C J/Dq+   J+        NPl/V+ V     J        . D+  NPl+          NSg/V    .
 > However , it       is easy    to enumerate every combination and to assign a   relative
 # C       . NPr/ISg+ VL NSg/V/J P  V         Dq+   N🅪Sg+       V/C P  NSg/V  D/P NSg/J
 > probability to each one        , by      multiplying together the probabilities of each
-# NSg         P  Dq+  NSg/I/V/J+ . NSg/J/P V           J        D   NPl           P  Dq+
-> choice in      turn  . The combination with the highest probability is then     chosen   . The
-# NSg/J+ NPr/J/P NSg/V . D   N🅪Sg        P    D+  JS+     NSg+        VL NSg/J/C+ NᴹSg/V/J . D+
-> European group  developed CLAWS  , a   tagging program that          did exactly this    and
-# NSg/J+   NSg/V+ V/J       NPl/V+ . D/P NSg/V+  NPr/V+  NSg/I/C/Ddem+ V   R       I/Ddem+ V/C
+# NSg+        P  Dq   NSg/I/V/J+ . NSg/J/P V           J        D   NPl           P  Dq
+> choice in      turn  . The combination with the highest probability is then    chosen   . The
+# NSg/J+ NPr/J/P NSg/V . D   N🅪Sg        P    D+  JS+     NSg+        VL NSg/J/C NᴹSg/V/J . D+
+> European group  developed CLAWS  , a   tagging program that          did exactly this   and
+# NSg/J+   NSg/V+ V/J       NPl/V+ . D/P NSg/V   NPr/V+  NSg/I/C/Ddem+ V   R       I/Ddem V/C
 > achieved accuracy in      the 93 – 95 % range  .
 # V/J      N🅪Sg+    NPr/J/P D   #  . #  . NSg/V+ .
 >
 #
 > Eugene Charniak points out         in      Statistical techniques for natural language
-# NPr+   ?        NPl/V+ NSg/V/J/R/P NPr/J/P J           NPl        C/P NSg/J   N🅪Sg/V+
-> parsing ( 1997 ) that          merely assigning the most       common  tag   to each known word  and
-# V       . #    . NSg/I/C/Ddem+ R      V         D   NSg/I/J/Dq NSg/V/J NSg/V P  Dq+  V/J   NSg/V V/C
-> the tag    " proper noun  " to all           unknowns will   approach 90 % accuracy because many
-# D   NSg/V+ . NSg/J  NSg/V . P  NSg/I/J/C/Dq+ NPl/V+   NPr/VX NSg/V    #  . N🅪Sg+    C/P     NSg/I/J/Dq+
-> words  are unambiguous , and many        others only  rarely represent their less    - common
-# NPl/V+ V   J           . V/C NSg/I/J/Dq+ NPl/V+ J/R/C R      V         D$+   V/J/C/P . NSg/V/J
+# NPr+   ?        NPl/V+ NSg/V/J/R/P NPr/J/P J           NPl        C/P NSg/J+  N🅪Sg/V+
+> parsing ( 1997 ) that          merely assigning the most       common  tag    to each known word   and
+# V       . #    . NSg/I/C/Ddem+ R      V         D   NSg/I/J/Dq NSg/V/J NSg/V+ P  Dq   V/J   NSg/V+ V/C
+> the tag    " proper noun   " to all          unknowns will   approach 90 % accuracy because many
+# D   NSg/V+ . NSg/J  NSg/V+ . P  NSg/I/J/C/Dq NPl/V+   NPr/VX NSg/V+   #  . N🅪Sg+    C/P     NSg/I/J/Dq
+> words  are unambiguous , and many       others only  rarely represent their less    - common
+# NPl/V+ V   J           . V/C NSg/I/J/Dq NPl/V+ J/R/C R      V         D$+   V/J/C/P . NSg/V/J
 > parts of speech  .
 # NPl/V P  N🅪Sg/V+ .
 >
 #
 > CLAWS  pioneered the field of HMM - based part    of speech  tagging but     was quite
 # NPl/V+ V/J       D   NSg/V P  V   . V/J   NSg/V/J P  N🅪Sg/V+ NSg/V   NSg/C/P V   NSg
-> expensive since it       enumerated all           possibilities . It       sometimes had to resort to
-# J         C/P   NPr/ISg+ V/J        NSg/I/J/C/Dq+ NPl+          . NPr/ISg+ R         V   P  NSg/V  P
-> backup methods when    there were  simply too many        options ( the Brown    Corpus
-# NSg/J  NPl/V+  NSg/I/C +     NSg/V R      W?  NSg/I/J/Dq+ NPl/V   . D+  NPr/V/J+ NSg+
-> contains a   case  with 17 ambiguous words in      a    row    , and there are words  such  as
-# V        D/P NPr/V P    #  J         NPl/V NPr/J/P D/P+ NSg/V+ . V/C +     V   NPl/V+ NSg/I NSg/R
+> expensive since it       enumerated all          possibilities . It       sometimes had to resort to
+# J         C/P   NPr/ISg+ V/J        NSg/I/J/C/Dq NPl+          . NPr/ISg+ R         V   P  NSg/V  P
+> backup methods when    there were  simply too many       options ( the Brown    Corpus
+# NSg/J  NPl/V+  NSg/I/C +     NSg/V R      W?  NSg/I/J/Dq NPl/V   . D+  NPr/V/J+ NSg+
+> contains a   case   with 17 ambiguous words in      a    row    , and there are words  such  as
+# V        D/P NPr/V+ P    #  J         NPl/V NPr/J/P D/P+ NSg/V+ . V/C +     V   NPl/V+ NSg/I NSg/R
 > " still   " that          can    represent as    many       as    7 distinct parts of speech  .
 # . NSg/V/J . NSg/I/C/Ddem+ NPr/VX V         NSg/R NSg/I/J/Dq NSg/R # V/J      NPl/V P  N🅪Sg/V+ .
 >
 #
 > HMMs underlie the functioning of stochastic taggers and are used in      various
-# ?    V        D   V           P  J          NPl     V/C V   V/J  NPr/J/P J
+# ?    V        D   V+          P  J          NPl     V/C V   V/J  NPr/J/P J
 > algorithms one       of the most       widely used being   the bi    - directional inference
 # NPl+       NSg/I/V/J P  D   NSg/I/J/Dq R      V/J  NSg/V/C D   NSg/J . NSg/J       NSg+
 > algorithm .
-# NSg+      .
+# NSg       .
 >
 #
 > Dynamic programming methods
-# NSg/J+  NᴹSg/V+     NPl/V
+# NSg/J+  NᴹSg/V+     NPl/V+
 >
 #
 > In      1987 , Steven DeRose and Kenneth W. Church independently developed dynamic
 # NPr/J/P #    . NPr+   ?      V/C NPr+    ?  NPr/V+ R             V/J       NSg/J
 > programming algorithms to solve the same problem in      vastly less    time      . Their
-# NᴹSg/V+     NPl+       P  NSg/V D   I/J  NSg/J   NPr/J/P R      V/J/C/P N🅪Sg/V/J+ . D$+
+# NᴹSg/V+     NPl+       P  NSg/V D   I/J  NSg/J+  NPr/J/P R      V/J/C/P N🅪Sg/V/J+ . D$+
 > methods were  similar to the Viterbi algorithm known for some     time      in      other
-# NPl/V+  NSg/V NSg/J   P  D   ?       NSg       V/J   C/P I/J/R/Dq N🅪Sg/V/J+ NPr/J/P NSg/V/J+
+# NPl/V+  NSg/V NSg/J   P  D   ?       NSg       V/J   C/P I/J/R/Dq N🅪Sg/V/J+ NPr/J/P NSg/V/J
 > fields   . DeRose used a   table of pairs  , while     Church used a   table of triples and a
 # NPrPl/V+ . ?      V/J  D/P NSg/V P  NPl/V+ . NSg/V/C/P NPr/V+ V/J  D/P NSg/V P  NPl/V   V/C D/P
 > method of estimating the values for triples that          were  rare    or    nonexistent in      the
-# NSg/V  P  V          D   NPl/V  C/P NPl/V   NSg/I/C/Ddem+ NSg/V NSg/V/J NPr/C NSg/J       NPr/J/P D+
-> Brown    Corpus ( an  actual measurement of triple  probabilities would require a   much
-# NPr/V/J+ NSg    . D/P NSg/J  NSg         P  NSg/V/J NPl+          VX    NSg/V   D/P NSg/I/J/Dq
+# NSg/V  P  V          D   NPl/V+ C/P NPl/V   NSg/I/C/Ddem+ NSg/V NSg/V/J NPr/C NSg/J       NPr/J/P D
+> Brown   Corpus ( an  actual measurement of triple  probabilities would require a   much
+# NPr/V/J NSg+   . D/P NSg/J  NSg         P  NSg/V/J NPl+          VX    NSg/V   D/P NSg/I/J/Dq
 > larger corpus ) . Both   methods achieved an  accuracy of over      95 % . DeRose's 1990
-# JC     NSg+   . . I/C/Dq NPl/V+  V/J      D/P N🅪Sg     P  NSg/V/J/P #  . . ?        #
+# JC     NSg+   . . I/C/Dq NPl/V+  V/J      D/P N🅪Sg+    P  NSg/V/J/P #  . . ?        #
 > dissertation at    Brown   University included analyses    of the specific error  types  ,
-# NSg+         NSg/P NPr/V/J NSg+       V/J      NPl/V/Au/Br P  D+  NSg/J+   NSg/V+ NPl/V+ .
-> probabilities , and other    related data  , and replicated his     work  for Greek   , where
-# NPl+          . V/C NSg/V/J+ J+      N🅪Pl+ . V/C V/J        ISg/D$+ NSg/V C/P NPr/V/J . NSg/C
+# NSg+         NSg/P NPr/V/J NSg+       V/J      NPl/V/Au/Br P  D   NSg/J    NSg/V+ NPl/V+ .
+> probabilities , and other   related data  , and replicated his     work   for Greek   , where
+# NPl+          . V/C NSg/V/J J       N🅪Pl+ . V/C V/J        ISg/D$+ NSg/V+ C/P NPr/V/J . NSg/C
 > it       proved similarly effective .
-# NPr/ISg+ V/J    R+        NSg/J     .
+# NPr/ISg+ V/J    R         NSg/J     .
 >
 #
 > These   findings were  surprisingly disruptive to the field of natural language
-# I/Ddem+ NSg      NSg/V R            J          P  D   NSg/V P  NSg/J+  N🅪Sg/V+
+# I/Ddem+ NSg+     NSg/V R            J          P  D   NSg/V P  NSg/J+  N🅪Sg/V+
 > processing . The accuracy reported was higher than the typical accuracy of very
 # V+         . D+  N🅪Sg+    V/J      V   NSg/JC C/P  D   NSg/J   N🅪Sg     P  J/R
 > sophisticated algorithms that          integrated part    of speech  choice with many       higher
-# V/J           NPl+       NSg/I/C/Ddem+ V/J        NSg/V/J P  N🅪Sg/V+ NSg/J  P    NSg/I/J/Dq NSg/JC
-> levels of linguistic analysis : syntax , morphology , semantics , and so         on  . CLAWS ,
-# NPl/V  P  J          N🅪Sg+    . NSg+   . NSg+       . NPl+      . V/C NSg/I/J/C+ J/P . NPl/V .
+# V/J+          NPl+       NSg/I/C/Ddem+ V/J        NSg/V/J P  N🅪Sg/V+ NSg/J+ P    NSg/I/J/Dq NSg/JC
+> levels of linguistic analysis : syntax , morphology , semantics , and so        on  . CLAWS  ,
+# NPl/V  P  J          N🅪Sg     . NSg+   . NSg+       . NPl+      . V/C NSg/I/J/C J/P . NPl/V+ .
 > DeRose's and Church's methods did fail    for some     of the known cases  where
-# ?        V/C NSg$     NPl/V+  V   NSg/V/J C/P I/J/R/Dq P  D+  V/J+  NPl/V+ NSg/C
-> semantics is required , but     those   proved negligibly rare     . This    convinced many       in
-# NPl+      VL V/J      . NSg/C/P I/Ddem+ V/J    R+         NSg/V/J+ . I/Ddem+ V/J       NSg/I/J/Dq NPr/J/P
-> the field  that          part    - of - speech tagging could  usefully be     separated from the other
-# D+  NSg/V+ NSg/I/C/Ddem+ NSg/V/J . P  . N🅪Sg/V NSg/V   NSg/VX R        NSg/VX V/J       P    D   NSg/V/J
+# ?        V/C NSg$     NPl/V+  V   NSg/V/J C/P I/J/R/Dq P  D   V/J   NPl/V+ NSg/C
+> semantics is required , but     those  proved negligibly rare    . This   convinced many       in
+# NPl+      VL V/J      . NSg/C/P I/Ddem V/J    R          NSg/V/J . I/Ddem V/J       NSg/I/J/Dq NPr/J/P
+> the field  that          part     - of - speech  tagging could  usefully be     separated from the other
+# D+  NSg/V+ NSg/I/C/Ddem+ NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   NSg/VX R        NSg/VX V/J       P    D   NSg/V/J
 > levels of processing ; this    , in      turn  , simplified the theory and practice of
-# NPl/V  P  V          . I/Ddem+ . NPr/J/P NSg/V . V/J        D+  NSg    V/C NSg/V    P
+# NPl/V  P  V+         . I/Ddem+ . NPr/J/P NSg/V . V/J        D   NSg    V/C NSg/V    P
 > computerized language analysis and encouraged researchers to find  ways to
 # V/J          N🅪Sg/V+  N🅪Sg+    V/C V/J        NPl+        P  NSg/V NPl+ P
-> separate other    pieces as     well    . Markov Models became the standard method for the
-# NSg/V/J  NSg/V/J+ NPl/V+ NSg/R+ NSg/V/J . NPr    NPl/V+ V      D   NSg/J    NSg/V  C/P D
-> part    - of - speech  assignment .
-# NSg/V/J . P  . N🅪Sg/V+ NSg+       .
+> separate other   pieces as    well    . Markov Models became the standard method for the
+# NSg/V/J  NSg/V/J NPl/V+ NSg/R NSg/V/J . NPr    NPl/V+ V      D   NSg/J    NSg/V+ C/P D
+> part     - of - speech  assignment .
+# NSg/V/J+ . P  . N🅪Sg/V+ NSg+       .
 >
 #
 > Unsupervised taggers
-# V/J+         NPl
+# V/J          NPl
 >
 #
-> The methods already discussed involve working from a   pre      - existing corpus to
-# D+  NPl/V   W?      V/J       V       V       P    D/P NSg/V/P+ . V        NSg    P
+> The methods already discussed involve working from a    pre      - existing corpus to
+# D+  NPl/V+  W?      V/J       V       V       P    D/P+ NSg/V/P+ . V        NSg+   P
 > learn tag    probabilities . It       is , however , also possible to bootstrap using
 # NSg/V NSg/V+ NPl+          . NPr/ISg+ VL . C       . W?   NSg/J    P  NSg/V     V
 > " unsupervised " tagging . Unsupervised tagging techniques use   an  untagged corpus
-# . V/J          . NSg/V   . V/J          NSg/V   NPl+       NSg/V D/P J        NSg
-> for their training data  and produce the tagset by       induction . That          is , they
-# C/P D$+   NSg/V+   N🅪Pl+ V/C NSg/V   D   NSg    NSg/J/P+ NSg       . NSg/I/C/Ddem+ VL . IPl+
-> observe patterns in      word   use   , and derive part    - of - speech categories themselves .
-# NSg/V   NPl/V+   NPr/J/P NSg/V+ NSg/V . V/C NSg/V  NSg/V/J . P  . N🅪Sg/V NPl+       IPl+       .
+# . V/J          . NSg/V   . V/J          NSg/V   NPl+       NSg/V D/P J        NSg+
+> for their training data  and produce the tagset by      induction . That          is , they
+# C/P D$+   NSg/V+   N🅪Pl+ V/C NSg/V   D   NSg    NSg/J/P NSg       . NSg/I/C/Ddem+ VL . IPl+
+> observe patterns in      word   use   , and derive part     - of - speech  categories themselves .
+# NSg/V   NPl/V+   NPr/J/P NSg/V+ NSg/V . V/C NSg/V  NSg/V/J+ . P  . N🅪Sg/V+ NPl+       IPl+       .
 > For example , statistics readily reveal that          " the " , " a   " , and " an  " occur in
 # C/P NSg/V+  . NPl/V+     R       NSg/V  NSg/I/C/Ddem+ . D   . . . D/P . . V/C . D/P . V     NPr/J/P
 > similar contexts , while     " eat " occurs in      very different ones   . With sufficient
-# NSg/J+  NPl/V+   . NSg/V/C/P . V   . V      NPr/J/P J/R  NSg/J+    NPl/V+ . P    J+
+# NSg/J+  NPl/V+   . NSg/V/C/P . V   . V      NPr/J/P J/R  NSg/J+    NPl/V+ . P    J
 > iteration , similarity classes of words  emerge that          are remarkably similar to
 # NSg       . NSg        NPl/V   P  NPl/V+ NSg/V  NSg/I/C/Ddem+ V   R          NSg/J   P
-> those   human   linguists would expect ; and the differences themselves sometimes
-# I/Ddem+ NSg/V/J NPl+      VX    V      . V/C D+  NSg/V+      IPl+       R
-> suggest valuable new      insights .
-# V       NSg/J+   NSg/V/J+ NPl+     .
+> those  human   linguists would expect ; and the differences themselves sometimes
+# I/Ddem NSg/V/J NPl+      VX    V      . V/C D   NSg/V+      IPl+       R
+> suggest valuable new     insights .
+# V       NSg/J    NSg/V/J NPl+     .
 >
 #
-> These  two  categories can    be     further subdivided into rule  - based , stochastic , and
-# I/Ddem NSg+ NPl        NPr/VX NSg/VX V/J     V/J        P    NSg/V . V/J   . J          . V/C
+> These   two  categories can    be     further subdivided into rule   - based , stochastic , and
+# I/Ddem+ NSg+ NPl+       NPr/VX NSg/VX V/J     V/J        P    NSg/V+ . V/J   . J          . V/C
 > neural approaches .
-# J+     NPl/V+     .
+# J      NPl/V+     .
 >
 #
-> Other    taggers and methods
-# NSg/V/J+ NPl     V/C NPl/V
+> Other   taggers and methods
+# NSg/V/J NPl     V/C NPl/V+
 >
 #
-> Some      current major   algorithms for part    - of - speech tagging include the Viterbi
-# I/J/R/Dq+ NSg/J   NPr/V/J NPl        C/P NSg/V/J . P  . N🅪Sg/V NSg/V   NSg/V   D   ?
+> Some     current major   algorithms for part     - of - speech  tagging include the Viterbi
+# I/J/R/Dq NSg/J   NPr/V/J NPl        C/P NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   NSg/V   D   ?
 > algorithm , Brill tagger , Constraint Grammar , and the Baum - Welch algorithm ( also
 # NSg       . NSg/J NSg    . NSg+       NSg/V+  . V/C D   NPr  . ?     NSg       . W?
 > known as    the forward - backward algorithm ) . Hidden Markov model    and visible Markov
-# V/J   NSg/R D   NSg/V/J . NSg/J    NSg+      . . V/J    NPr    NSg/V/J+ V/C J       NPr
+# V/J   NSg/R D   NSg/V/J . NSg/J    NSg       . . V/J    NPr    NSg/V/J+ V/C J       NPr
 > model    taggers can    both   be     implemented using the Viterbi algorithm . The
-# NSg/V/J+ NPl     NPr/VX I/C/Dq NSg/VX V/J         V     D+  ?       NSg       . D
+# NSg/V/J+ NPl     NPr/VX I/C/Dq NSg/VX V/J         V     D   ?       NSg       . D+
 > rule   - based Brill tagger is unusual in      that         it       learns a   set     of rule   patterns , and
 # NSg/V+ . V/J   NSg/J NSg    VL NSg/J   NPr/J/P NSg/I/C/Ddem NPr/ISg+ NPl/V  D/P NPr/V/J P  NSg/V+ NPl/V+   . V/C
-> then    applies those   patterns rather  than optimizing a    statistical quantity .
-# NSg/J/C V       I/Ddem+ NPl/V+   NPr/V/J C/P  V          D/P+ J+          NSg+     .
+> then    applies those  patterns rather  than optimizing a   statistical quantity .
+# NSg/J/C V       I/Ddem NPl/V+   NPr/V/J C/P  V          D/P J           NSg+     .
 >
 #
 > Many        machine learning methods have   also been  applied to the problem of POS
-# NSg/I/J/Dq+ NSg/V   V+       NPl/V+  NSg/VX W?   NSg/V V/J     P  D   NSg/J   P  NSg+
+# NSg/I/J/Dq+ NSg/V+  V+       NPl/V+  NSg/VX W?   NSg/V V/J     P  D   NSg/J   P  NSg+
 > tagging . Methods such  as    SVM , maximum entropy classifier , perceptron , and
-# NSg/V+  . NPl/V+  NSg/I NSg/R ?   . NSg/J   NSg     NSg        . NSg        . V/C
-> nearest - neighbor   have   all          been  tried , and most       can    achieve accuracy above
-# JS      . NSg/V/J/Am NSg/VX NSg/I/J/C/Dq NSg/V V/J   . V/C NSg/I/J/Dq NPr/VX V       N🅪Sg+    NSg/J/P
+# NSg/V   . NPl/V+  NSg/I NSg/R ?   . NSg/J   NSg     NSg        . NSg        . V/C
+> nearest - neighbor    have   all          been  tried , and most       can    achieve accuracy above
+# JS      . NSg/V/J/Am+ NSg/VX NSg/I/J/C/Dq NSg/V V/J   . V/C NSg/I/J/Dq NPr/VX V       N🅪Sg+    NSg/J/P
 > 95 % . [ citation needed ]
-# #  . . . NSg+     V/J+   .
+# #  . . . NSg+     V/J    .
 >
 #
 > A   direct comparison of several methods is reported ( with references ) at    the ACL
-# D/P V/J    NSg        P  J/Dq+   NPl/V+  VL V/J      . P    NPl/V+     . NSg/P D+  NSg+
+# D/P V/J    NSg        P  J/Dq+   NPl/V+  VL V/J      . P    NPl/V+     . NSg/P D   NSg
 > Wiki   . This    comparison uses  the Penn tag    set     on  some     of the Penn Treebank data  ,
-# NSg/V+ . I/Ddem+ NSg+       NPl/V D+  NPr+ NSg/V+ NPr/V/J J/P I/J/R/Dq P  D+  NPr+ ?        N🅪Pl+ .
+# NSg/V+ . I/Ddem+ NSg+       NPl/V D+  NPr+ NSg/V+ NPr/V/J J/P I/J/R/Dq P  D   NPr+ ?        N🅪Pl+ .
 > so        the results are directly comparable . However , many       significant taggers are
-# NSg/I/J/C D+  NPl/V+  V   R/C      NSg/J+     . C       . NSg/I/J/Dq NSg/J       NPl     V
+# NSg/I/J/C D   NPl/V+  V   R/C      NSg/J      . C       . NSg/I/J/Dq NSg/J       NPl     V
 > not   included ( perhaps because of the labor        involved in      reconfiguring them     for
-# NSg/C V/J      . NSg     C/P     P  D+  NPr/V/Am/Au+ V/J      NPr/J/P V             NSg/IPl+ C/P
-> this    particular dataset ) . Thus , it       should not   be     assumed that         the results
-# I/Ddem+ NSg/J+     NSg     . . NSg  . NPr/ISg+ VX     NSg/C NSg/VX V/J     NSg/I/C/Ddem D+  NPl/V+
+# NSg/C V/J      . NSg     C/P     P  D   NPr/V/Am/Au+ V/J      NPr/J/P V             NSg/IPl+ C/P
+> this   particular dataset ) . Thus , it       should not   be     assumed that         the results
+# I/Ddem NSg/J      NSg     . . NSg  . NPr/ISg+ VX     NSg/C NSg/VX V/J     NSg/I/C/Ddem D+  NPl/V+
 > reported here    are the best      that          can    be     achieved with a    given      approach ; nor   even
 # V/J      NSg/J/R V   D   NPr/VX/JS NSg/I/C/Ddem+ NPr/VX NSg/VX V/J      P    D/P+ NSg/V/J/P+ NSg/V+   . NSg/C NSg/V/J
 > the best       that          have   been  achieved with a    given      approach .
@@ -435,6 +435,6 @@
 >
 #
 > In      2014 , a    paper     reporting using the structure regularization method for
-# NPr/J/P #    . D/P+ N🅪Sg/V/J+ V         V     D+  NSg/V+    N🅪Sg           NSg/V  C/P
-> part    - of - speech tagging , achieving 97.36 % on  a   standard benchmark dataset .
-# NSg/V/J . P  . N🅪Sg/V NSg/V   . V         #     . J/P D/P NSg/J+   NSg/V+    NSg     .
+# NPr/J/P #    . D/P+ N🅪Sg/V/J+ V         V     D   NSg/V+    N🅪Sg           NSg/V  C/P
+> part     - of - speech  tagging , achieving 97.36 % on  a   standard benchmark dataset .
+# NSg/V/J+ . P  . N🅪Sg/V+ NSg/V   . V         #     . J/P D/P NSg/J    NSg/V     NSg     .
--- a/harper-core/tests/text/tagged/Spell.US.md
+++ b/harper-core/tests/text/tagged/Spell.US.md
@@ -3,15 +3,15 @@
 >
 #
 > This    document contains a   list  of words  spelled correctly in      some     dialects of English   , but     not   American English   . This    is designed to test  the spelling suggestions we   give  for such   mistakes .
-# I/Ddem+ NSg/V    V        D/P NSg/V P  NPl/V+ V/J     R         NPr/J/P I/J/R/Dq NPl      P  NPr🅪/V/J+ . NSg/C/P NSg/C NPr/J+   NPr🅪/V/J+ . I/Ddem+ VL V/J      P  NSg/V D+  NSg/V+   NPl+        IPl+ NSg/V C/P NSg/I+ NPl/V+   .
+# I/Ddem+ NSg/V+   V        D/P NSg/V P  NPl/V+ V/J     R         NPr/J/P I/J/R/Dq NPl      P  NPr🅪/V/J+ . NSg/C/P NSg/C NPr/J    NPr🅪/V/J+ . I/Ddem+ VL V/J      P  NSg/V D+  NSg/V+   NPl+        IPl+ NSg/V C/P NSg/I+ NPl/V+   .
 >
 #
-> To achieve this   , the filename of this    file   contains `.US       , which will   tell  the snapshot generator to use   the American dialect , rather  than trying  to use   an  automatically detected dialect .
-# P  V       I/Ddem . D   NSg      P  I/Ddem+ NSg/V+ V        Unlintable . I/C+  NPr/VX NPr/V D+  NSg/V+   NSg       P  NSg/V D+  NPr/J+   NSg+    . NPr/V/J C/P  NSg/V/J P  NSg/V D/P W?            V/J      NSg+    .
+> To achieve this    , the filename of this   file   contains `.US       , which will   tell  the snapshot generator to use   the American dialect , rather  than trying  to use   an  automatically detected dialect .
+# P  V       I/Ddem+ . D   NSg      P  I/Ddem NSg/V+ V        Unlintable . I/C+  NPr/VX NPr/V D   NSg/V+   NSg       P  NSg/V D   NPr/J    NSg+    . NPr/V/J C/P  NSg/V/J P  NSg/V D/P W?            V/J      NSg+    .
 >
 #
 > Words
-# NPl/V
+# NPl/V+
 >
 #
 >
@@ -26,36 +26,36 @@
 # NSg/V/Comm+ .
 >
 #
-> Labelled  .
-# V/J/Comm+ .
+> Labelled .
+# V/J/Comm .
 >
 #
 > Flavour      .
 # N🅪Sg/V/Comm+ .
 >
 #
-> Favoured  .
-# V/J/Comm+ .
+> Favoured .
+# V/J/Comm .
 >
 #
 > Honour       .
 # N🅪Sg/V/Comm+ .
 >
 #
-> Grey          .
-# NPr/V/J/Comm+ .
+> Grey         .
+# NPr/V/J/Comm .
 >
 #
 > Quarrelled .
-# V/Comm+    .
+# V/Comm     .
 >
 #
-> Quarrelling  .
-# NᴹSg/V/Comm+ .
+> Quarrelling .
+# NᴹSg/V/Comm .
 >
 #
 > Recognised .
-# V/J/Au/Br+ .
+# V/J/Au/Br  .
 >
 #
 > Neighbour     .
@@ -63,11 +63,11 @@
 >
 #
 > Neighbouring .
-# V/Comm+      .
+# V/Comm       .
 >
 #
-> Clamour     .
-# NSg/V/Comm+ .
+> Clamour    .
+# NSg/V/Comm .
 >
 #
 > Theatre    .
--- a/harper-core/tests/text/tagged/Spell.md
+++ b/harper-core/tests/text/tagged/Spell.md
@@ -3,15 +3,15 @@
 >
 #
 > This    document contains example sentences with misspelled words  that          we   want  to test  the spell checker on  .
-# I/Ddem+ NSg/V    V        NSg/V+  NPl/V     P    V/J+       NPl/V+ NSg/I/C/Ddem+ IPl+ NSg/V P  NSg/V D   NSg/V NSg/V   J/P .
+# I/Ddem+ NSg/V+   V        NSg/V+  NPl/V+    P    V/J        NPl/V+ NSg/I/C/Ddem+ IPl+ NSg/V P  NSg/V D   NSg/V NSg/V   J/P .
 >
 #
 > Example Sentences
-# NSg/V+  NPl/V
+# NSg/V+  NPl/V+
 >
 #
-> My  favourite    color       is  blu .
-# D$+ NSg/V/J/Comm N🅪Sg/V/J/Am VL+ W?  .
+> My  favourite     color        is blu .
+# D$+ NSg/V/J/Comm+ N🅪Sg/V/J/Am+ VL W?  .
 > I    must  defend my  honour       !
 # ISg+ NSg/V NSg/V  D$+ N🅪Sg/V/Comm+ .
 > I    recognize that         you    recognise me       .
--- a/harper-core/tests/text/tagged/Swear.md
+++ b/harper-core/tests/text/tagged/Swear.md
@@ -2,17 +2,17 @@
 # NPl/V
 >
 #
-> This    documents tests  that         different forms / variations of swears are tagged as    such  .
-# I/Ddem+ NPl/V+    NPl/V+ NSg/I/C/Ddem NSg/J     NPl/V . W?         P  NPl/V  V   V/J    NSg/R NSg/I .
+> This    documents tests  that         different forms  / variations of swears are tagged as    such  .
+# I/Ddem+ NPl/V+    NPl/V+ NSg/I/C/Ddem NSg/J+    NPl/V+ . W?         P  NPl/V  V   V/J    NSg/R NSg/I .
 >
 #
 > Examples
-# NPl/V
+# NPl/V+
 >
 #
-> One        turd    , two  turds   .
-# NSg/I/V/J+ NSg/V/B . NSg+ NPl/V/B .
+> One        turd     , two turds   .
+# NSg/I/V/J+ NSg/V+/B . NSg NPl/V/B .
 >
 #
 > I    fart    , you're farting , he       farts   , she  farted .
-# ISg+ NSg/V/B . +      V/B     . NPr/ISg+ NPl/V/B . ISg+ V/J+/B .
+# ISg+ NSg/V/B . +      V/B     . NPr/ISg+ NPl/V/B . ISg+ V/J/B  .
--- a/harper-core/tests/text/tagged/The
+++ b/harper-core/tests/text/tagged/The
--- a/harper-core/tests/text/tagged/The
+++ b/harper-core/tests/text/tagged/The
--- a/harper-core/tests/text/tagged/this
+++ b/harper-core/tests/text/tagged/this
@@ -1,11 +1,11 @@
 > " This    " and " that          " are common  and fulfill multiple purposes in      everyday English   .
 # . I/Ddem+ . V/C . NSg/I/C/Ddem+ . V   NSg/V/J V/C V/NoAm  NSg/J/Dq NPl/V    NPr/J/P NSg/J+   NPr🅪/V/J+ .
-> As    such  , disambiguating them     is  necessary .
-# NSg/R NSg/I . V              NSg/IPl+ VL+ NSg/J     .
+> As    such  , disambiguating them     is necessary .
+# NSg/R NSg/I . V              NSg/IPl+ VL NSg/J     .
 >
 #
-> This    document contains various sentences that          use   " this    " , " that          " , " these   " , and
-# I/Ddem+ NSg/V    V        J       NPl/V+    NSg/I/C/Ddem+ NSg/V . I/Ddem+ . . . NSg/I/C/Ddem+ . . . I/Ddem+ . . V/C
+> This    document contains various sentences that          use   " this    " , " that          " , " these  " , and
+# I/Ddem+ NSg/V+   V        J+      NPl/V+    NSg/I/C/Ddem+ NSg/V . I/Ddem+ . . . NSg/I/C/Ddem+ . . . I/Ddem . . V/C
 > " those  " in      different contexts with a   lot   of edge   cases  .
 # . I/Ddem . NPr/J/P NSg/J     NPl/V    P    D/P NPr/V P  NSg/V+ NPl/V+ .
 >
@@ -14,54 +14,54 @@
 # NPl/V+
 >
 #
-> This    triangle is  nice     .
-# I/Ddem+ NSg+     VL+ NPr/V/J+ .
-> This    is  nice     .
-# I/Ddem+ VL+ NPr/V/J+ .
-> That          triangle is  nice     .
-# NSg/I/C/Ddem+ NSg+     VL+ NPr/V/J+ .
-> That          is  nice     .
-# NSg/I/C/Ddem+ VL+ NPr/V/J+ .
-> These   triangles are nice     .
-# I/Ddem+ NPl+      V+  NPr/V/J+ .
-> These   are nice     .
-# I/Ddem+ V+  NPr/V/J+ .
-> Those   triangles are nice     .
-# I/Ddem+ NPl+      V+  NPr/V/J+ .
+> This   triangle is nice    .
+# I/Ddem NSg      VL NPr/V/J .
+> This    is nice    .
+# I/Ddem+ VL NPr/V/J .
+> That          triangle is nice    .
+# NSg/I/C/Ddem+ NSg      VL NPr/V/J .
+> That          is nice    .
+# NSg/I/C/Ddem+ VL NPr/V/J .
+> These  triangles are nice    .
+# I/Ddem NPl       V   NPr/V/J .
+> These   are nice    .
+# I/Ddem+ V   NPr/V/J .
+> Those  triangles are nice    .
+# I/Ddem NPl       V   NPr/V/J .
 > Those   are nice    .
-# I/Ddem+ V+  NPr/V/J .
+# I/Ddem+ V   NPr/V/J .
 >
 #
-> This    massage is  nice     .
-# I/Ddem+ NSg/V+  VL+ NPr/V/J+ .
-> That         massage is  nice     .
-# NSg/I/C/Ddem NSg/V+  VL+ NPr/V/J+ .
-> These   massages are nice     .
-# I/Ddem+ NPl/V+   V+  NPr/V/J+ .
-> Those   massages are nice     .
-# I/Ddem+ NPl/V+   V+  NPr/V/J+ .
-> This    massages well     .
-# I/Ddem+ NPl/V+   NSg/V/J+ .
-> That          massages well     .
-# NSg/I/C/Ddem+ NPl/V+   NSg/V/J+ .
-> These   massage well     .
-# I/Ddem+ NSg/V+  NSg/V/J+ .
-> Those   massage well     .
-# I/Ddem+ NSg/V+  NSg/V/J+ .
+> This    massage is nice    .
+# I/Ddem+ NSg/V+  VL NPr/V/J .
+> That         massage is nice    .
+# NSg/I/C/Ddem NSg/V+  VL NPr/V/J .
+> These   massages are nice    .
+# I/Ddem+ NPl/V+   V   NPr/V/J .
+> Those   massages are nice    .
+# I/Ddem+ NPl/V+   V   NPr/V/J .
+> This    massages well    .
+# I/Ddem+ NPl/V+   NSg/V/J .
+> That          massages well    .
+# NSg/I/C/Ddem+ NPl/V+   NSg/V/J .
+> These   massage well    .
+# I/Ddem+ NSg/V+  NSg/V/J .
+> Those   massage well    .
+# I/Ddem+ NSg/V+  NSg/V/J .
 >
 #
-> That          could  be     a   solution .
-# NSg/I/C/Ddem+ NSg/VX NSg/VX D/P NSg      .
-> Find  all           candidates that          could  be     a   solution .
-# NSg/V NSg/I/J/C/Dq+ NPl/V+     NSg/I/C/Ddem+ NSg/VX NSg/VX D/P NSg+     .
+> That          could  be     a    solution .
+# NSg/I/C/Ddem+ NSg/VX NSg/VX D/P+ NSg+     .
+> Find  all           candidates that          could  be     a    solution .
+# NSg/V NSg/I/J/C/Dq+ NPl/V+     NSg/I/C/Ddem+ NSg/VX NSg/VX D/P+ NSg+     .
 >
 #
-> This    is all          that         I    have    .
-# I/Ddem+ VL NSg/I/J/C/Dq NSg/I/C/Ddem ISg+ NSg/VX+ .
-> This    is all          that         solutions can     do     .
-# I/Ddem+ VL NSg/I/J/C/Dq NSg/I/C/Ddem NPl+      NPr/VX+ NSg/VX .
-> That         solution can     do     .
-# NSg/I/C/Ddem NSg+     NPr/VX+ NSg/VX .
+> This    is all          that         I    have   .
+# I/Ddem+ VL NSg/I/J/C/Dq NSg/I/C/Ddem ISg+ NSg/VX .
+> This    is all          that         solutions can    do     .
+# I/Ddem+ VL NSg/I/J/C/Dq NSg/I/C/Ddem NPl+      NPr/VX NSg/VX .
+> That         solution can    do     .
+# NSg/I/C/Ddem NSg+     NPr/VX NSg/VX .
 >
 #
 > We   can    do     this    !
--- a/harper-ls/src/main.rs
+++ b/harper-ls/src/main.rs
@@ -39,7 +39,7 @@ struct Args {

 // Setting worker threads to four means the process will use about five threads total
 // This is because worker threads do not include blocking threads
-#[tokio::main(worker_threads = 4)]
+#[tokio::main(worker_threads = 1)]
 async fn main() -> anyhow::Result<()> {
    let subscriber = FmtSubscriber::builder()
        .map_writer(move |_| stderr)
--- a/harper-pos-utils/Cargo.toml
+++ b/harper-pos-utils/Cargo.toml
@@ -15,8 +15,13 @@ serde = { version = "1.0.219", features = ["derive"] }
 is-macro = "0.3.7"
 rayon = { version = "1.10.0", optional = true }
 rand = { version = "0.9.1", optional = true }
+burn = { version = "0.18.0", default-features = false, features = ["std"] }
+burn-ndarray = { version = "0.18.0", default-features = false }
+serde_json = "1.0.140"
+itertools = "0.14.0"
+lru = "0.16.0"

 [features]
 default = []
 threaded = ["dep:rayon"]
-training = ["dep:rand"]
+training = ["dep:rand", "burn/train", "burn/autodiff"]
--- a/harper-pos-utils/src/chunker/burn_chunker.rs
+++ b/harper-pos-utils/src/chunker/burn_chunker.rs
@@ -0,0 +1,399 @@
+use crate::{UPOS, chunker::Chunker};
+#[cfg(feature = "training")]
+use burn::backend::Autodiff;
+
+#[cfg(feature = "training")]
+use burn::nn::loss::{MseLoss, Reduction};
+use burn::nn::{Dropout, DropoutConfig};
+#[cfg(feature = "training")]
+use burn::optim::{GradientsParams, Optimizer};
+use burn::record::{FullPrecisionSettings, NamedMpkBytesRecorder, NamedMpkFileRecorder, Recorder};
+use burn::tensor::TensorData;
+#[cfg(feature = "training")]
+use burn::tensor::backend::AutodiffBackend;
+
+use burn::{
+    module::Module,
+    nn::{BiLstmConfig, EmbeddingConfig, LinearConfig},
+    tensor::{Int, Tensor, backend::Backend},
+};
+use burn_ndarray::{NdArray, NdArrayDevice};
+use hashbrown::HashMap;
+use std::path::Path;
+
+const UNK_IDX: usize = 1;
+
+#[derive(Module, Debug)]
+struct NpModel<B: Backend> {
+    embedding_words: burn::nn::Embedding<B>,
+    embedding_upos: burn::nn::Embedding<B>,
+    lstm: burn::nn::BiLstm<B>,
+    linear_out: burn::nn::Linear<B>,
+    dropout: Dropout,
+}
+
+impl<B: Backend> NpModel<B> {
+    fn new(vocab: usize, word_embed_dim: usize, dropout: f32, device: &B::Device) -> Self {
+        let upos_embed = 8;
+        let total_embed = word_embed_dim + upos_embed;
+
+        Self {
+            embedding_words: EmbeddingConfig::new(vocab, word_embed_dim).init(device),
+            embedding_upos: EmbeddingConfig::new(20, upos_embed).init(device),
+            lstm: BiLstmConfig::new(total_embed, total_embed, false).init(device),
+            // Multiply by two because the BiLSTM emits double the hidden parameters
+            linear_out: LinearConfig::new(total_embed * 2, 1).init(device),
+            dropout: DropoutConfig::new(dropout as f64).init(),
+        }
+    }
+
+    fn forward(
+        &self,
+        word_tens: Tensor<B, 2, Int>,
+        tag_tens: Tensor<B, 2, Int>,
+        use_dropout: bool,
+    ) -> Tensor<B, 2> {
+        let word_embed = self.embedding_words.forward(word_tens);
+        let tag_embed = self.embedding_upos.forward(tag_tens);
+
+        let mut x = Tensor::cat(vec![word_embed, tag_embed], 2);
+
+        if use_dropout {
+            x = self.dropout.forward(x);
+        }
+
+        let (mut x, _) = self.lstm.forward(x, None);
+
+        if use_dropout {
+            x = self.dropout.forward(x);
+        }
+
+        let x = self.linear_out.forward(x);
+        x.squeeze::<2>(2)
+    }
+}
+
+pub struct BurnChunker<B: Backend> {
+    vocab: HashMap<String, usize>,
+    model: NpModel<B>,
+    device: B::Device,
+}
+
+impl<B: Backend> BurnChunker<B> {
+    fn idx(&self, tok: &str) -> usize {
+        *self.vocab.get(tok).unwrap_or(&UNK_IDX)
+    }
+
+    fn to_tensors(
+        &self,
+        sent: &[String],
+        tags: &[Option<UPOS>],
+    ) -> (Tensor<B, 2, Int>, Tensor<B, 2, Int>) {
+        // Interleave with UPOS tags
+        let idxs: Vec<_> = sent.iter().map(|t| self.idx(t) as i32).collect();
+
+        let upos: Vec<_> = tags
+            .iter()
+            .map(|t| t.map(|o| o as i32 + 2).unwrap_or(1))
+            .collect();
+
+        let word_tensor =
+            Tensor::<B, 1, Int>::from_data(TensorData::from(idxs.as_slice()), &self.device)
+                .reshape([1, sent.len()]);
+
+        let tag_tensor =
+            Tensor::<B, 1, Int>::from_data(TensorData::from(upos.as_slice()), &self.device)
+                .reshape([1, sent.len()]);
+
+        (word_tensor, tag_tensor)
+    }
+
+    pub fn save_to(&self, dir: impl AsRef<Path>) {
+        let dir = dir.as_ref();
+        std::fs::create_dir_all(dir).unwrap();
+
+        let recorder = NamedMpkFileRecorder::<FullPrecisionSettings>::new();
+        self.model
+            .clone()
+            .save_file(dir.join("model.mpk"), &recorder)
+            .unwrap();
+
+        let vocab_bytes = serde_json::to_vec(&self.vocab).unwrap();
+        std::fs::write(dir.join("vocab.json"), vocab_bytes).unwrap();
+    }
+
+    pub fn load_from_bytes(
+        model_bytes: impl AsRef<[u8]>,
+        vocab_bytes: impl AsRef<[u8]>,
+        embed_dim: usize,
+        dropout: f32,
+        device: B::Device,
+    ) -> Self {
+        let vocab: HashMap<String, usize> = serde_json::from_slice(vocab_bytes.as_ref()).unwrap();
+
+        let recorder = NamedMpkBytesRecorder::<FullPrecisionSettings>::new();
+
+        let owned_data = model_bytes.as_ref().to_vec();
+        let record = recorder.load(owned_data, &device).unwrap();
+
+        let model = NpModel::new(vocab.len(), embed_dim, dropout, &device);
+        let model = model.load_record(record);
+
+        Self {
+            vocab,
+            model,
+            device,
+        }
+    }
+}
+
+#[cfg(feature = "training")]
+struct ExtractedSentences(
+    Vec<Vec<String>>,
+    Vec<Vec<Option<UPOS>>>,
+    Vec<Vec<bool>>,
+    HashMap<String, usize>,
+);
+
+#[cfg(feature = "training")]
+impl<B: Backend + AutodiffBackend> BurnChunker<B> {
+    fn to_label(&self, labels: &[bool]) -> Tensor<B, 2> {
+        let ys: Vec<_> = labels.iter().map(|b| if *b { 1. } else { 0. }).collect();
+
+        Tensor::<B, 1, _>::from_data(TensorData::from(ys.as_slice()), &self.device)
+            .reshape([1, labels.len()])
+    }
+
+    pub fn train(
+        training_files: &[impl AsRef<Path>],
+        test_file: &impl AsRef<Path>,
+        word_embed_dim: usize,
+        dropout: f32,
+        epochs: usize,
+        lr: f64,
+        device: B::Device,
+    ) -> Self {
+        use burn::tensor::cast::ToElement;
+
+        println!("Preparing datasets...");
+        let ExtractedSentences(sents, tags, labs, vocab) =
+            Self::extract_sents_from_files(training_files);
+
+        println!("Preparing model and training config...");
+
+        let mut model = NpModel::<B>::new(vocab.len(), word_embed_dim, dropout, &device);
+        let opt_config = burn::optim::AdamConfig::new();
+        let mut opt = opt_config.init();
+
+        let util = BurnChunker {
+            vocab: vocab.clone(),
+            model: model.clone(),
+            device: device.clone(),
+        };
+
+        let loss_fn = MseLoss::new();
+        let mut last_score = 0.;
+
+        println!("Training...");
+
+        for _ in 0..epochs {
+            let mut total_loss = 0.;
+            let mut total_tokens = 0;
+            let mut total_correct: usize = 0;
+
+            for (i, ((x, w), y)) in sents.iter().zip(tags.iter()).zip(labs.iter()).enumerate() {
+                let (word_tens, tag_tens) = util.to_tensors(x, w);
+                let y_tensor = util.to_label(y);
+
+                let logits = model.forward(word_tens, tag_tens, true);
+                total_correct += logits
+                    .to_data()
+                    .iter()
+                    .map(|p: f32| p > 0.5)
+                    .zip(y)
+                    .map(|(a, b)| if a == *b { 1 } else { 0 })
+                    .sum::<usize>();
+
+                let loss = loss_fn.forward(logits, y_tensor, Reduction::Mean);
+
+                let grads = loss.backward();
+                let grads = GradientsParams::from_grads(grads, &model);
+
+                model = opt.step(lr, model, grads);
+
+                total_loss += loss.into_scalar().to_f64();
+                total_tokens += x.len();
+
+                if i % 1000 == 0 {
+                    println!("{i}/{}", sents.len());
+                }
+            }
+
+            println!(
+                "Average loss for epoch: {}",
+                total_loss / sents.len() as f64 * 100.
+            );
+
+            println!(
+                "{}% correct in training dataset",
+                total_correct as f32 / total_tokens as f32 * 100.
+            );
+
+            let score = util.score_model(&model, test_file);
+            println!("{}% correct in test dataset", score * 100.);
+
+            if score < last_score {
+                println!("Overfitting detected. Stopping...");
+                break;
+            }
+
+            last_score = score;
+        }
+
+        Self {
+            vocab,
+            model,
+            device,
+        }
+    }
+
+    fn score_model(&self, model: &NpModel<B>, dataset: &impl AsRef<Path>) -> f32 {
+        let ExtractedSentences(sents, tags, labs, _) = Self::extract_sents_from_files(&[dataset]);
+
+        let mut total_tokens = 0;
+        let mut total_correct: usize = 0;
+
+        for ((x, w), y) in sents.iter().zip(tags.iter()).zip(labs.iter()) {
+            let (word_tens, tag_tens) = self.to_tensors(x, w);
+
+            let logits = model.forward(word_tens, tag_tens, false);
+            total_correct += logits
+                .to_data()
+                .iter()
+                .map(|p: f32| p > 0.5)
+                .zip(y)
+                .map(|(a, b)| if a == *b { 1 } else { 0 })
+                .sum::<usize>();
+
+            total_tokens += x.len();
+        }
+
+        total_correct as f32 / total_tokens as f32
+    }
+
+    fn extract_sents_from_files(files: &[impl AsRef<Path>]) -> ExtractedSentences {
+        use super::np_extraction::locate_noun_phrases_in_sent;
+        use crate::conllu_utils::iter_sentences_in_conllu;
+
+        let mut vocab: HashMap<String, usize> = HashMap::new();
+        vocab.insert("<UNK>".into(), UNK_IDX);
+
+        let mut sents: Vec<Vec<String>> = Vec::new();
+        let mut sent_tags: Vec<Vec<Option<UPOS>>> = Vec::new();
+        let mut labs: Vec<Vec<bool>> = Vec::new();
+
+        const CONTRACTIONS: &[&str] = &["sn't", "n't", "'ll", "'ve", "'re", "'d", "'m", "'s"];
+
+        for file in files {
+            for sent in iter_sentences_in_conllu(file) {
+                let spans = locate_noun_phrases_in_sent(&sent);
+
+                let mut original_mask = vec![false; sent.tokens.len()];
+                for span in spans {
+                    for i in span {
+                        original_mask[i] = true;
+                    }
+                }
+
+                let mut toks: Vec<String> = Vec::new();
+                let mut tags: Vec<Option<UPOS>> = Vec::new();
+                let mut mask: Vec<bool> = Vec::new();
+
+                for (idx, tok) in sent.tokens.iter().enumerate() {
+                    let is_contraction = CONTRACTIONS.contains(&&tok.form[..]);
+                    if is_contraction && !toks.is_empty() {
+                        let prev_tok = toks.pop().unwrap();
+                        let prev_mask = mask.pop().unwrap();
+                        toks.push(format!("{prev_tok}{}", tok.form));
+                        mask.push(prev_mask || original_mask[idx]);
+                    } else {
+                        toks.push(tok.form.clone());
+                        tags.push(tok.upos.and_then(UPOS::from_conllu));
+                        mask.push(original_mask[idx]);
+                    }
+                }
+
+                for t in &toks {
+                    if !vocab.contains_key(t) {
+                        let next = vocab.len();
+                        vocab.insert(t.clone(), next);
+                    }
+                }
+
+                sents.push(toks);
+                sent_tags.push(tags);
+                labs.push(mask);
+            }
+        }
+
+        ExtractedSentences(sents, sent_tags, labs, vocab)
+    }
+}
+
+#[cfg(feature = "training")]
+pub type BurnChunkerCpu = BurnChunker<burn::backend::Autodiff<NdArray>>;
+
+#[cfg(not(feature = "training"))]
+pub type BurnChunkerCpu = BurnChunker<NdArray>;
+
+impl BurnChunkerCpu {
+    pub fn load_from_bytes_cpu(
+        model_bytes: impl AsRef<[u8]>,
+        vocab_bytes: impl AsRef<[u8]>,
+        embed_dim: usize,
+        dropout: f32,
+    ) -> Self {
+        Self::load_from_bytes(
+            model_bytes,
+            vocab_bytes,
+            embed_dim,
+            dropout,
+            NdArrayDevice::Cpu,
+        )
+    }
+}
+
+#[cfg(feature = "training")]
+impl BurnChunkerCpu {
+    pub fn train_cpu(
+        training_files: &[impl AsRef<Path>],
+        test_file: &impl AsRef<Path>,
+        embed_dim: usize,
+        dropout: f32,
+        epochs: usize,
+        lr: f64,
+    ) -> Self {
+        BurnChunker::<Autodiff<NdArray>>::train(
+            training_files,
+            test_file,
+            embed_dim,
+            dropout,
+            epochs,
+            lr,
+            NdArrayDevice::Cpu,
+        )
+    }
+}
+
+impl<B: Backend> Chunker for BurnChunker<B> {
+    fn chunk_sentence(&self, sentence: &[String], tags: &[Option<UPOS>]) -> Vec<bool> {
+        // Solves a divide-by-zero error in the linear layer.
+        if sentence.is_empty() {
+            return Vec::new();
+        }
+
+        let (word_tens, tag_tens) = self.to_tensors(sentence, tags);
+        let prob = self.model.forward(word_tens, tag_tens, false);
+        prob.to_data().iter().map(|p: f32| p > 0.5).collect()
+    }
+}
--- a/harper-pos-utils/src/chunker/cached_chunker.rs
+++ b/harper-pos-utils/src/chunker/cached_chunker.rs
@@ -0,0 +1,61 @@
+use lru::LruCache;
+use std::hash::Hash;
+use std::num::NonZeroUsize;
+use std::sync::Mutex;
+
+use super::Chunker;
+use crate::UPOS;
+
+/// Wraps any chunker implementation to add an LRU Cache.
+/// Useful for incremental lints.
+pub struct CachedChunker<C: Chunker> {
+    inner: C,
+    cache: Mutex<LruCache<CacheKey, Vec<bool>>>,
+}
+
+impl<C: Chunker> CachedChunker<C> {
+    pub fn new(inner: C, capacity: NonZeroUsize) -> Self {
+        Self {
+            inner,
+            cache: Mutex::new(LruCache::new(capacity)),
+        }
+    }
+}
+
+impl<C: Chunker> Chunker for CachedChunker<C> {
+    fn chunk_sentence(&self, sentence: &[String], tags: &[Option<UPOS>]) -> Vec<bool> {
+        let key = CacheKey::new(sentence, tags);
+
+        // Attempt a cache hit.
+        // We put this in the block so `read` gets dropped as early as possible.
+        if let Ok(mut read) = self.cache.try_lock() {
+            if let Some(result) = read.get(&key) {
+                return result.clone();
+            }
+        };
+
+        // We don't want to hold the lock since it may take a while to run the chunker.
+        let result = self.inner.chunk_sentence(sentence, tags);
+
+        if let Ok(mut cache) = self.cache.try_lock() {
+            cache.put(key, result.clone());
+        }
+
+        result
+    }
+}
+
+#[derive(Hash, PartialEq, Eq)]
+struct CacheKey {
+    sentence: Vec<String>,
+    tags: Vec<Option<UPOS>>,
+}
+
+impl CacheKey {
+    fn new(sentence: &[String], tags: &[Option<UPOS>]) -> Self {
+        Self {
+            sentence: sentence.to_vec(),
+            tags: tags.to_vec(),
+        }
+    }
+}
--- a/harper-pos-utils/src/chunker/mod.rs
+++ b/harper-pos-utils/src/chunker/mod.rs
@@ -1,11 +1,15 @@
 use crate::UPOS;

 mod brill_chunker;
+mod burn_chunker;
+mod cached_chunker;
 #[cfg(feature = "training")]
 mod np_extraction;
 mod upos_freq_dict;

 pub use brill_chunker::BrillChunker;
+pub use burn_chunker::{BurnChunker, BurnChunkerCpu};
+pub use cached_chunker::CachedChunker;
 pub use upos_freq_dict::UPOSFreqDict;

 /// An implementer of this trait is capable of identifying the noun phrases in a provided sentence.
--- a/harper-pos-utils/src/lib.rs
+++ b/harper-pos-utils/src/lib.rs
@@ -7,6 +7,8 @@ mod upos;
 #[cfg(feature = "training")]
 mod word_counter;

-pub use chunker::{BrillChunker, Chunker, UPOSFreqDict};
+pub use chunker::{
+    BrillChunker, BurnChunker, BurnChunkerCpu, CachedChunker, Chunker, UPOSFreqDict,
+};
 pub use tagger::{BrillTagger, FreqDict, FreqDictBuilder, Tagger};
 pub use upos::{UPOS, UPOSIter};
--- a/harper-wasm/Cargo.toml
+++ b/harper-wasm/Cargo.toml
@@ -18,4 +18,5 @@ once_cell = "1.21.3"
 serde-wasm-bindgen = "0.6.5"
 serde_json = "1.0.141"
 serde = { version = "1.0.219", features = ["derive"] }
+getrandom = { version = "0.3.3", default-features = false, features = ["wasm_js"] }
 harper-stats = { path = "../harper-stats", version = "0.54.0", features = ["js"] }
--- a/12
+++ b/12
@@ -3,10 +3,11 @@ format:
  cargo fmt  
  pnpm format

-# Build the WebAssembly for a specific target (usually either `web` or `bundler`)
+# Build the WebAssembly module
 build-wasm:
-  cd "{{justfile_directory()}}/harper-wasm" && wasm-pack build --target web
-
+  #!/usr/bin/env bash
+  cd "{{justfile_directory()}}/harper-wasm"
+  RUSTFLAGS='--cfg getrandom_backend="wasm_js"' wasm-pack build --target web

 # Build `harper.js` with all size optimizations available.
 build-harperjs: build-wasm 
@@ -590,6 +591,9 @@ newest-dict-changes *numCommits:
    });
  });

+getnps a:
+  cargo run --bin harper-cli -- nominal-phrases "{{a}}"
+
 # Suggest annotations for a potential new property annotation
 suggestannotation input:
  #! /usr/bin/env node
@@ -630,4 +634,4 @@ suggestannotation input:
    } else {
      console.log(`None of the characters of "${input}" are available to use for new annotations, and none of them are OK to be moved to make way for new annotations.`);
    }
-  }
+  }
--- a/packages/chrome-plugin/src/background/index.ts
+++ b/packages/chrome-plugin/src/background/index.ts
@@ -35,16 +35,16 @@ chrome.runtime.onInstalled.addListener((details) => {
 	}
 });

-let linter: LocalLinter;
-
-getDialect().then(setDialect);
-
 chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
 	handleRequest(request).then(sendResponse);

 	return true;
 });

+let linter: LocalLinter;
+
+getDialect().then(setDialect);
+
 async function enableDefaultDomains() {
 	const defaultEnabledDomains = [
 		'chatgpt.com',
@@ -93,6 +93,8 @@ async function enableDefaultDomains() {
 enableDefaultDomains();

 function handleRequest(message: Request): Promise<Response> {
+	console.log(`Handling ${message.kind} request`);
+
 	switch (message.kind) {
 		case 'lint':
 			return handleLint(message);
--- a/packages/chrome-plugin/src/contentScript/index.ts
+++ b/packages/chrome-plugin/src/contentScript/index.ts
@@ -2,9 +2,18 @@ import '@webcomponents/custom-elements';
 import $ from 'jquery';
 import { isVisible, leafNodes } from '../domUtils';
 import LintFramework from '../LintFramework';
+import ProtocolClient from '../ProtocolClient';

 const fw = new LintFramework();

+const keepAliveCallback = () => {
+	ProtocolClient.lint('', 'example.com');
+
+	setTimeout(keepAliveCallback, 400);
+};
+
+keepAliveCallback();
+
 function scan() {
 	$('textarea:visible').each(function () {
 		if (this.getAttribute('data-enable-grammarly') == 'false' || this.disabled || this.readOnly) {
--- a/packages/chrome-plugin/tests/fixtures.ts
+++ b/packages/chrome-plugin/tests/fixtures.ts
@@ -2,4 +2,11 @@ import path from 'path';
 import { createFixture } from 'playwright-webextext';

 const pathToExtension = path.join(import.meta.dirname, '../build');
-export const { test, expect } = createFixture(pathToExtension);
+const { test, expect } = createFixture(pathToExtension);
+
+test.afterEach(async ({ context }) => {
+	const bg = context.serviceWorkers()[0] ?? context.backgroundPages()[0];
+	if (bg) await bg.evaluate(() => chrome?.storage?.local.clear?.());
+});
+
+export { test, expect };
--- a/packages/chrome-plugin/tests/github.spec.ts
+++ b/packages/chrome-plugin/tests/github.spec.ts
@@ -17,6 +17,9 @@ testCanIgnoreTextareaSuggestion(TEST_PAGE_URL);
 test('Wraps correctly', async ({ page }) => {
 	await page.goto(TEST_PAGE_URL);

+	await page.waitForTimeout(2000);
+	await page.reload();
+
 	const editor = getTextarea(page);
 	await replaceEditorContent(
 		editor,
@@ -34,6 +37,9 @@ test('Wraps correctly', async ({ page }) => {
 test('Scrolls correctly', async ({ page }) => {
 	await page.goto(TEST_PAGE_URL);

+	await page.waitForTimeout(2000);
+	await page.reload();
+
 	const editor = getTextarea(page);
 	await replaceEditorContent(
 		editor,
--- a/packages/chrome-plugin/tests/lexical.spec.ts
+++ b/packages/chrome-plugin/tests/lexical.spec.ts
@@ -1,5 +1,10 @@
 import { expect, test } from './fixtures';
-import { clickHarperHighlight, getLexicalEditor, replaceEditorContent } from './testUtils';
+import {
+	clickHarperHighlight,
+	getLexicalEditor,
+	randomString,
+	replaceEditorContent,
+} from './testUtils';

 const TEST_PAGE_URL = 'https://playground.lexical.dev/';

@@ -27,7 +32,8 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.goto(TEST_PAGE_URL);
 	const lexical = getLexicalEditor(page);

-	await replaceEditorContent(lexical, 'This is an test.');
+	const cacheSalt = randomString(5);
+	await replaceEditorContent(lexical, cacheSalt);

 	await page.waitForTimeout(3000);

@@ -37,6 +43,6 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.waitForTimeout(3000);

 	// Nothing should change.
-	expect(lexical).toContainText('This is an test');
+	expect(lexical).toContainText(cacheSalt);
 	expect(await clickHarperHighlight(page)).toBe(false);
 });
--- a/packages/chrome-plugin/tests/prosemirror.spec.ts
+++ b/packages/chrome-plugin/tests/prosemirror.spec.ts
@@ -1,5 +1,10 @@
 import { expect, test } from './fixtures';
-import { clickHarperHighlight, getProseMirrorEditor, replaceEditorContent } from './testUtils';
+import {
+	clickHarperHighlight,
+	getProseMirrorEditor,
+	randomString,
+	replaceEditorContent,
+} from './testUtils';

 const TEST_PAGE_URL = 'https://prosemirror.net/';

@@ -27,7 +32,8 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.goto(TEST_PAGE_URL);
 	const pm = getProseMirrorEditor(page);

-	await replaceEditorContent(pm, 'This is an test.');
+	const cacheSalt = randomString(5);
+	await replaceEditorContent(pm, cacheSalt);

 	await page.waitForTimeout(3000);

@@ -37,6 +43,6 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.waitForTimeout(3000);

 	// Nothing should change.
-	expect(pm).toContainText('This is an test');
+	expect(pm).toContainText(cacheSalt);
 	expect(await clickHarperHighlight(page)).toBe(false);
 });
--- a/packages/chrome-plugin/tests/slate.spec.ts
+++ b/packages/chrome-plugin/tests/slate.spec.ts
@@ -1,5 +1,10 @@
 import { expect, test } from './fixtures';
-import { clickHarperHighlight, getSlateEditor, replaceEditorContent } from './testUtils';
+import {
+	clickHarperHighlight,
+	getSlateEditor,
+	randomString,
+	replaceEditorContent,
+} from './testUtils';

 const TEST_PAGE_URL = 'https://slatejs.org';

@@ -27,7 +32,8 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.goto(TEST_PAGE_URL);
 	const slate = getSlateEditor(page);

-	await replaceEditorContent(slate, 'This is an test.');
+	const cacheSalt = randomString(5);
+	await replaceEditorContent(slate, cacheSalt);

 	await page.waitForTimeout(3000);

@@ -37,6 +43,6 @@ test('Can ignore suggestion.', async ({ page }) => {
 	await page.waitForTimeout(3000);

 	// Nothing should change.
-	expect(slate).toContainText('This is an test');
+	expect(slate).toContainText(cacheSalt);
 	expect(await clickHarperHighlight(page)).toBe(false);
 });
--- a/packages/chrome-plugin/tests/testUtils.ts
+++ b/packages/chrome-plugin/tests/testUtils.ts
@@ -2,6 +2,15 @@ import type { Locator, Page } from '@playwright/test';
 import type { Box } from '../src/Box';
 import { expect, test } from './fixtures';

+export function randomString(length: number): string {
+	const chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
+	let result = '';
+	for (let i = 0; i < length; i++) {
+		result += chars.charAt(Math.floor(Math.random() * chars.length));
+	}
+	return result;
+}
+
 /** Locate the [`Slate`](https://www.slatejs.org/examples/richtext) editor on the page.  */
 export function getSlateEditor(page: Page): Locator {
 	return page.locator('[data-slate-editor="true"]');
@@ -59,9 +68,12 @@ export function getTextarea(page: Page): Locator {
 }

 export async function testBasicSuggestionTextarea(testPageUrl: string) {
-	test('Can apply basic suggestion.', async ({ page }) => {
+	test('Can apply basic suggestion.', async ({ page, context }) => {
 		await page.goto(testPageUrl);

+		await page.waitForTimeout(2000);
+		await page.reload();
+
 		const editor = getTextarea(page);
 		await replaceEditorContent(editor, 'This is an test');

@@ -80,8 +92,13 @@ export async function testCanIgnoreTextareaSuggestion(testPageUrl: string) {
 	test('Can ignore suggestion.', async ({ page }) => {
 		await page.goto(testPageUrl);

+		await page.waitForTimeout(2000);
+		await page.reload();
+
 		const editor = getTextarea(page);
-		await replaceEditorContent(editor, 'This is an test');
+
+		const cacheSalt = randomString(5);
+		await replaceEditorContent(editor, cacheSalt);

 		await page.waitForTimeout(6000);

@@ -91,7 +108,7 @@ export async function testCanIgnoreTextareaSuggestion(testPageUrl: string) {
 		await page.waitForTimeout(3000);

 		// Nothing should change.
-		expect(editor).toHaveValue('This is an test');
+		expect(editor).toHaveValue(cacheSalt);
 		expect(await clickHarperHighlight(page)).toBe(false);
 	});
 }