From ea2e788ae168cbbb5a6fb0bca900df22629502d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Tue, 5 Mar 2019 08:28:42 +0100 Subject: [PATCH] Remove code that is unused in this release branch --- finalfrontier/src/deps.rs | 496 ------------------------------- finalfrontier/src/lib.rs | 3 - finalfrontier/src/train_model.rs | 18 -- finalfrontier/src/vocab.rs | 14 +- 4 files changed, 1 insertion(+), 530 deletions(-) delete mode 100644 finalfrontier/src/deps.rs diff --git a/finalfrontier/src/deps.rs b/finalfrontier/src/deps.rs deleted file mode 100644 index e9cf9f0..0000000 --- a/finalfrontier/src/deps.rs +++ /dev/null @@ -1,496 +0,0 @@ -use conllx::graph::{DepGraph, DepTriple}; -use std::mem; - -/// Trait to provide iterators over the path in a tree from `start` to the root. -pub trait PathIter { - fn path_iter(&self, start: usize) -> PathIterator; -} - -impl<'a> PathIter for DepGraph<'a> { - fn path_iter(&self, start: usize) -> PathIterator { - PathIterator { - graph: self, - current: start, - } - } -} - -/// Iterator over the path from the given start node to the root node. -/// -/// The path does not include the start node itself. -pub struct PathIterator<'a, 'b> { - current: usize, - graph: &'a DepGraph<'b>, -} - -impl<'a, 'b> Iterator for PathIterator<'a, 'b> { - type Item = DepTriple<&'b str>; - - fn next(&mut self) -> Option { - if let Some(triple) = self.graph.head(self.current) { - self.current = triple.head(); - Some(triple) - } else { - None - } - } -} - -/// Enum for different types of dependencies. Typed through direction, depth, attached form and label. -#[derive(Clone, Debug, Hash, Eq, PartialEq)] -pub enum Dependency { - /// Typed dependency through Direction (`Regular` and `Inverse`), depth, relation label and form. - Typed { - direction: DependencyDirection, - depth: usize, - dep_label: String, - form: String, - }, - /// Untyped dependency just denoting that there exists any kind of relation. - Untyped(String), -} - -// Constructors for convenience -impl Dependency { - fn regular(depth: usize, dep_label: S, form: T) -> Self - where - S: Into, - T: Into, - { - Dependency::Typed { - direction: DependencyDirection::Regular, - depth, - dep_label: dep_label.into(), - form: form.into(), - } - } - fn inverse(depth: usize, dep_label: S, form: T) -> Self - where - S: Into, - T: Into, - { - Dependency::Typed { - direction: DependencyDirection::Inverse, - depth, - dep_label: dep_label.into(), - form: form.into(), - } - } -} - -/// Enum to denote the direction of a dependency relation. -#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)] -pub enum DependencyDirection { - /// Inverse relation: relation seen from a dependent to its head. - Inverse, - /// Untyped: Just the attached form. - Regular, -} - -/// Struct to iterate over the dependencies in a `conllx::DepGraph`. -/// -/// Provides `Vec`s of tuples with form `(focus_idx, Dependency)` where focus_idx is the index of the -/// focus token for the Dependency context. -pub struct DependencyIterator<'a> { - max_depth: usize, - cur: usize, - depth: usize, - graph: &'a DepGraph<'a>, - path_iter: PathIterator<'a, 'a>, - buffer: Option<(usize, Dependency)>, -} - -impl<'a> DependencyIterator<'a> { - /// Constructs a new `DependencyIterator` which returns up to `max_depth`-order dependencies. - /// - /// If `max_depth == 0`, all contexts are extracted. - pub fn new(graph: &'a DepGraph<'a>, max_depth: usize) -> Self { - DependencyIterator { - max_depth, - cur: 1, - depth: 0, - buffer: None, - graph, - path_iter: graph.path_iter(1), - } - } - - /// Constructs a `Dependency` context with `DependencyDirection::Inverse` for the token at index - /// `self.cur`. - fn inverse_context(&self, triple: &DepTriple<&str>, depth: usize) -> Dependency { - let rel = triple.relation().unwrap_or_default(); - if let Some(token) = self.graph[triple.head()].token() { - Dependency::inverse(depth, rel, token.form()) - } else { - Dependency::inverse(depth, rel, "") - } - } - - /// Constructs a `Dependency` context with `DependencyDirection::Regular` for a head-token. - fn regular_context(&self, triple: &DepTriple<&str>, depth: usize) -> Dependency { - let rel = triple.relation().unwrap_or_default(); - if let Some(token) = self.graph[triple.dependent()].token() { - Dependency::regular(depth, rel, token.form()) - } else { - Dependency::regular(depth, rel, "") - } - } -} - -impl<'a> Iterator for DependencyIterator<'a> { - type Item = (usize, Dependency); - - fn next(&mut self) -> Option<(usize, Dependency)> { - // possibly return stored regular dependency - if self.buffer.is_some() { - return self.buffer.take(); - } - - // while loop moves through sentence - while self.cur < self.graph.len() { - // climb up the tree one step per next() call - if let Some(triple) = self.path_iter.next() { - if (self.depth == self.max_depth) && (self.max_depth != 0) { - continue; - } - self.depth += 1; - - // guard against int underflow since root idx is 0 - if triple.head() != 0 { - // unwrap is safe here because self.path_iter.next() has to make the same check. - let cur_triple = self.graph.head(self.cur).unwrap(); - // regular dependency is context for the head of the triple but is typed through - // the token at self.cur and its incoming edge - self.buffer = Some(( - triple.head() - 1, - self.regular_context(&cur_triple, self.depth), - )); - } - // inverse dependencies are contexts of self.cur and typed through the head of the - // triple and the outgoing relation of that head - return Some((self.cur - 1, self.inverse_context(&triple, self.depth))); - } - self.cur += 1; - self.depth = 0; - self.path_iter = self.graph.path_iter(self.cur); - } - None - } -} - -/// Trait offering adapters for `DependencyIterator`. -pub trait DepIter: Sized { - /// Normalizes the `form` in `Dependency` through lower-casing. - fn normalized(self) -> Normalized; - /// Maps `Dependency::Typed` to `Dependency::Untyped`. - fn untyped(self) -> Untyped; - /// Removes `Dependency`s with `form == ""` - fn filter_root(self) -> FilterRoot; -} - -impl DepIter for I -where - I: Iterator, -{ - fn normalized(self) -> Normalized { - Normalized { inner: self } - } - fn untyped(self) -> Untyped { - Untyped { inner: self } - } - fn filter_root(self) -> FilterRoot { - FilterRoot { inner: self } - } -} - -/// Adapter for iterators over `(usize, Dependency)` to filter `Dependency`s with `form == ""` -pub struct FilterRoot { - inner: I, -} - -impl Iterator for FilterRoot -where - I: Iterator, -{ - type Item = (usize, Dependency); - - fn next(&mut self) -> Option<(usize, Dependency)> { - while let Some(tuple) = self.inner.next() { - match tuple.1 { - Dependency::Typed { ref form, .. } => { - if form == "" { - continue; - } - } - Dependency::Untyped(ref form) => { - if form == "" { - continue; - } - } - } - return Some(tuple); - } - None - } -} - -/// Adapter for iterators over `(usize, Dependency)` to normalize the `form` in the `Dependency`. -pub struct Normalized { - inner: I, -} - -impl Iterator for Normalized -where - I: Iterator, -{ - type Item = (usize, Dependency); - - fn next(&mut self) -> Option<(usize, Dependency)> { - self.inner - .next() - .map(|mut tuple| { - match tuple.1 { - Dependency::Untyped(ref mut form) => { - let normalized = form.to_lowercase(); - mem::replace(form, normalized); - } - Dependency::Typed { ref mut form, .. } => { - let normalized = form.to_lowercase(); - mem::replace(form, normalized); - } - } - tuple - }) - .take() - } -} - -/// Adapter for iterators over `(usize, Dependency)` to map `Dependency::Typed` to -/// `Dependency::Untyped`. -/// -/// The adapter takes the `form` from the input `Dependency` and wraps it in `Dependency::Untyped`. -pub struct Untyped { - inner: I, -} - -impl Iterator for Untyped -where - I: Iterator, -{ - type Item = (usize, Dependency); - - fn next(&mut self) -> Option<::Item> { - self.inner - .next() - .map(|mut tuple| { - if let Dependency::Typed { form, .. } = tuple.1 { - tuple.1 = Dependency::Untyped(form); - } - tuple - }) - .take() - } -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use conllx::graph::Node; - use conllx::io::{ReadSentence, Reader}; - use deps::DepIter; - use deps::Dependency; - use deps::Dependency::Untyped; - use deps::{DependencyIterator, PathIter}; - - static DEP: &[u8; 143] = b"1 Er a _ _ _ 2 SUBJ _ _\n\ - 2 geht b _ _ _ 0 ROOT _ _\n\ - 3 ins c _ _ _ 2 PP _ _\n\ - 4 Kino d _ _ _ 3 PN _ _\n\ - 5 root2 e _ _ _ 0 ROOT _ _\n\ - 6 dep f _ _ _ 5 DEP"; - - #[test] - fn paths() { - let c = Cursor::new(DEP.to_vec()); - let mut reader = Reader::new(c); - let v = vec![ - vec!["geht".to_string(), "".to_string()], // ER - vec!["".to_string()], // GEHT - vec!["geht".to_string(), "".to_string()], // INS - vec!["ins".to_string(), "geht".to_string(), "".to_string()], // KINO - vec!["".to_string()], //root2 - vec!["root2".to_string(), "".to_string()], //dep - ]; - let sentence = reader.read_sentence().unwrap().unwrap(); - - let g = sentence.dep_graph(); - assert_eq!(g.len() - 1, v.len()); - for (target, node) in v.into_iter().zip(1..g.len()) { - let mut path = g.path_iter(node); - assert_eq!( - path.map(|triple| triple.head()) - .map(|head| match &g[head] { - Node::Token(token) => token.form().to_owned(), - Node::Root => "".to_owned(), - }) - .collect::>(), - target - ); - } - } - - #[test] - pub fn dep_iter_typed_with_root_depth1() { - let c = Cursor::new(DEP.to_vec()); - let mut reader = Reader::new(c); - - let sentence = reader.read_sentence().unwrap().unwrap(); - let target_deps = vec![ - (0, Dependency::inverse(1, "SUBJ", "geht")), - (1, Dependency::regular(1, "SUBJ", "Er")), // er - (1, Dependency::inverse(1, "ROOT", "")), // geht - (2, Dependency::inverse(1, "PP", "geht")), - (1, Dependency::regular(1, "PP", "ins")), // ins - (3, Dependency::inverse(1, "PN", "ins")), - (2, Dependency::regular(1, "PN", "Kino")), // kino - (4, Dependency::inverse(1, "ROOT", "")), // root2 - (5, Dependency::inverse(1, "DEP", "root2")), - (4, Dependency::regular(1, "DEP", "dep")), // dep - ]; - let deps = DependencyIterator::new(&sentence.dep_graph(), 1).collect::>(); - assert_eq!(deps.len(), target_deps.len()); - for (dep, target_dep) in deps.into_iter().zip(target_deps) { - assert_eq!(dep, target_dep); - } - } - - #[test] - pub fn dep_iter_typed_no_root_depth1() { - let c = Cursor::new(DEP.to_vec()); - - let target_deps = vec![ - (0, Dependency::inverse(1, "SUBJ", "geht")), - (1, Dependency::regular(1, "SUBJ", "Er")), - (2, Dependency::inverse(1, "PP", "geht")), - (1, Dependency::regular(1, "PP", "ins")), - (3, Dependency::inverse(1, "PN", "ins")), - (2, Dependency::regular(1, "PN", "Kino")), - (5, Dependency::inverse(1, "DEP", "root2")), - (4, Dependency::regular(1, "DEP", "dep")), - ]; - let mut reader = Reader::new(c); - let sentence = reader.read_sentence().unwrap().unwrap(); - let deps = DependencyIterator::new(&sentence.dep_graph(), 1) - .filter_root() - .collect::>(); - assert_eq!(deps.len(), target_deps.len()); - for (dep, target_dep) in deps.into_iter().zip(target_deps) { - assert_eq!(dep, target_dep); - } - } - - #[test] - pub fn dep_iter_normalized_typed_no_root_depth2() { - let target_deps = vec![ - (0, Dependency::inverse(1, "SUBJ", "geht")), - (1, Dependency::regular(1, "SUBJ", "er")), - (2, Dependency::inverse(1, "PP", "geht")), - (1, Dependency::regular(1, "PP", "ins")), - (3, Dependency::inverse(1, "PN", "ins")), - (2, Dependency::regular(1, "PN", "kino")), - (3, Dependency::inverse(2, "PP", "geht")), - (1, Dependency::regular(2, "PN", "kino")), - (5, Dependency::inverse(1, "DEP", "root2")), - (4, Dependency::regular(1, "DEP", "dep")), - ]; - - let c = Cursor::new(DEP.to_vec()); - let mut reader = Reader::new(c); - - let sentence = reader.read_sentence().unwrap().unwrap(); - - let deps = DependencyIterator::new(&sentence.dep_graph(), 2) - .normalized() - .filter_root() - .collect::>(); - assert_eq!(deps.len(), target_deps.len()); - for (dep, target_dep) in deps.into_iter().zip(target_deps) { - assert_eq!(dep, target_dep); - } - } - - #[test] - pub fn dep_iter_untyped_with_root_depth2() { - let target_deps = vec![ - // reachable from "er" - (0, Untyped("geht".to_string())), - (1, Untyped("er".to_string())), - (0, Untyped("".to_string())), - // reachable from "geht" - (1, Untyped("".to_string())), - // reachable from "ins" - (2, Untyped("geht".to_string())), - (1, Untyped("ins".to_string())), - (2, Untyped("".to_string())), - // reachable from "Kino" - (3, Untyped("ins".to_string())), - (2, Untyped("kino".to_string())), - (3, Untyped("geht".to_string())), - (1, Untyped("kino".to_string())), - // reachable from "root2" - (4, Untyped("".to_string())), - // reachable from "dep" - (5, Untyped("root2".to_string())), - (4, Untyped("dep".to_string())), - (5, Untyped("".to_string())), - ]; - - let c = Cursor::new(DEP.to_vec()); - let mut reader = Reader::new(c); - - let sentence = reader.read_sentence().unwrap().unwrap(); - let deps = DependencyIterator::new(&sentence.dep_graph(), 2) - .normalized() - .untyped() - .collect::>(); - assert_eq!(deps.len(), target_deps.len()); - for (dep, target_dep) in deps.into_iter().zip(target_deps) { - assert_eq!(dep, target_dep); - } - } - - #[test] - pub fn dep_iter_typed_with_root_depth2() { - let target_deps = vec![ - (0, Dependency::inverse(1, "SUBJ", "geht")), - (1, Dependency::regular(1, "SUBJ", "er")), - (0, Dependency::inverse(2, "ROOT", "")), - (1, Dependency::inverse(1, "ROOT", "")), - (2, Dependency::inverse(1, "PP", "geht")), - (1, Dependency::regular(1, "PP", "ins")), - (2, Dependency::inverse(2, "ROOT", "")), - (3, Dependency::inverse(1, "PN", "ins")), - (2, Dependency::regular(1, "PN", "kino")), - (3, Dependency::inverse(2, "PP", "geht")), - (1, Dependency::regular(2, "PN", "kino")), - (4, Dependency::inverse(1, "ROOT", "")), - (5, Dependency::inverse(1, "DEP", "root2")), - (4, Dependency::regular(1, "DEP", "dep")), - (5, Dependency::inverse(2, "ROOT", "")), - ]; - - let c = Cursor::new(DEP.to_vec()); - let mut reader = Reader::new(c); - - let sentence = reader.read_sentence().unwrap().unwrap(); - - let deps = DependencyIterator::new(&sentence.dep_graph(), 2) - .normalized() - .collect::>(); - assert_eq!(deps.len(), target_deps.len()); - for (dep, target_dep) in deps.into_iter().zip(target_deps) { - assert_eq!(dep, target_dep); - } - } -} diff --git a/finalfrontier/src/lib.rs b/finalfrontier/src/lib.rs index c9847e6..0be78e6 100644 --- a/finalfrontier/src/lib.rs +++ b/finalfrontier/src/lib.rs @@ -45,9 +45,6 @@ extern crate zipf; mod config; pub use config::{Config, LossType, ModelType}; -mod deps; -pub(crate) use deps::{DepIter, Dependency, DependencyIterator, PathIter}; - mod io; pub use io::{SentenceIterator, WriteModelBinary, WriteModelText, WriteModelWord2Vec}; diff --git a/finalfrontier/src/train_model.rs b/finalfrontier/src/train_model.rs index 7319967..9443e4d 100644 --- a/finalfrontier/src/train_model.rs +++ b/finalfrontier/src/train_model.rs @@ -100,12 +100,6 @@ impl TrainModel where { embed } - /// Get the input embedding with the given index. - #[inline] - pub(crate) fn input_embedding(&self, idx: usize) -> ArrayView1 { - self.input.subview(Axis(0), idx) - } - /// Get the input embedding with the given index mutably. #[inline] pub(crate) fn input_embedding_mut(&mut self, idx: usize) -> ArrayViewMut1 { @@ -224,18 +218,6 @@ mod tests { output, }; - // Input embeddings - assert!(all_close( - model.input_embedding(0).as_slice().unwrap(), - &[1., 2., 3.], - 1e-5 - )); - assert!(all_close( - model.input_embedding(1).as_slice().unwrap(), - &[4., 5., 6.], - 1e-5 - )); - // Mutable input embeddings assert!(all_close( model.input_embedding_mut(0).as_slice().unwrap(), diff --git a/finalfrontier/src/vocab.rs b/finalfrontier/src/vocab.rs index 307d108..cb13362 100644 --- a/finalfrontier/src/vocab.rs +++ b/finalfrontier/src/vocab.rs @@ -169,15 +169,6 @@ where discards, } } - - /// Get a specific context - pub fn get(&self, context: &Q) -> Option<&CountedType> - where - T: Borrow, - Q: Hash + ?Sized + Eq, - { - self.idx(context).map(|idx| &self.types[idx]) - } } /// Trait for lookup of indices. @@ -552,12 +543,9 @@ mod tests { let vocab: SimpleVocab<&str> = builder.into(); assert_eq!(vocab.len(), 3); - assert_eq!(vocab.get("c"), None); assert_eq!(vocab.n_types(), 18); - let a = vocab.get("a").unwrap(); - assert_eq!("a", a.label); - assert_eq!(5, a.count()); + // 0.0001 / 5/18 + (0.0001 / 5/18).sqrt() = 0.019334 assert!(util::close( 0.019334,