From 336c27a2e9dbe655e8229c990735cbc8a77babcf Mon Sep 17 00:00:00 2001 From: oyelowo Date: Wed, 4 Oct 2023 19:05:55 -0600 Subject: [PATCH] Implement define analyzer --- .../src/statements/define_analyzer.rs | 222 ++++++++++++++++++ query-builder/src/statements/mod.rs | 4 + 2 files changed, 226 insertions(+) create mode 100644 query-builder/src/statements/define_analyzer.rs diff --git a/query-builder/src/statements/define_analyzer.rs b/query-builder/src/statements/define_analyzer.rs new file mode 100644 index 0000000000..330bea39a8 --- /dev/null +++ b/query-builder/src/statements/define_analyzer.rs @@ -0,0 +1,222 @@ +/* + * Author: Oyelowo Oyedayo + * Email: oyelowooyedayo@gmail.com + * Copyright (c) 2023 Oyelowo Oyedayo + * Licensed under the MIT license + */ + +use std::fmt::{self, Display}; + +use crate::{BindingsList, Buildable, Erroneous, ErrorList, Parametric, Queryable}; + +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub enum Tokenizer { + Blank, + Camel, + Class, + Punct, +} + +impl Display for Tokenizer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Tokenizer::Blank => write!(f, "Blank"), + Tokenizer::Camel => write!(f, "Camel"), + Tokenizer::Class => write!(f, "Class"), + Tokenizer::Punct => write!(f, "Punct"), + } + } +} + +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub enum SnowballLanguage { + Arabic, + Danish, + Dutch, + English, + French, + German, + Greek, + Hungarian, + Italian, + Norwegian, + Portuguese, + Romanian, + Russian, + Spanish, + Swedish, + Tamil, + Turkish, +} + +impl Display for SnowballLanguage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SnowballLanguage::Arabic => write!(f, "arabic"), + SnowballLanguage::Danish => write!(f, "danish"), + SnowballLanguage::Dutch => write!(f, "dutch"), + SnowballLanguage::English => write!(f, "english"), + SnowballLanguage::French => write!(f, "french"), + SnowballLanguage::German => write!(f, "german"), + SnowballLanguage::Greek => write!(f, "greek"), + SnowballLanguage::Hungarian => write!(f, "hungarian"), + SnowballLanguage::Italian => write!(f, "italian"), + SnowballLanguage::Norwegian => write!(f, "norwegian"), + SnowballLanguage::Portuguese => write!(f, "portuguese"), + SnowballLanguage::Romanian => write!(f, "romanian"), + SnowballLanguage::Russian => write!(f, "russian"), + SnowballLanguage::Spanish => write!(f, "spanish"), + SnowballLanguage::Swedish => write!(f, "swedish"), + SnowballLanguage::Tamil => write!(f, "tamil"), + SnowballLanguage::Turkish => write!(f, "turkish"), + } + } +} + +#[allow(missing_docs)] +#[derive(Debug, Clone)] +pub enum AnalyzerFilter { + Ascii, + Edgengram(u32, u32), + Lowercase, + Snowball(SnowballLanguage), + Uppercase, +} + +impl Display for AnalyzerFilter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AnalyzerFilter::Ascii => write!(f, "ascii"), + AnalyzerFilter::Edgengram(min, max) => write!(f, "edgengram({}, {})", min, max), + AnalyzerFilter::Lowercase => write!(f, "lowercase"), + AnalyzerFilter::Snowball(lang) => write!(f, "snowball({})", lang), + AnalyzerFilter::Uppercase => write!(f, "uppercase"), + } + } +} + +/// `DEFINE ANALYZER` statement +#[derive(Clone, Debug)] +pub struct DefineAnalyzerStatement { + name: String, + tokenizers: Vec, + filters: Vec, + bindings: BindingsList, + errors: ErrorList, +} + +/// Create a new `DefineAnalyzerStatement` +/// ``` +/// # use surreal_query_builder as surreal_orm; +/// use surreal_orm::statements::{define_analyzer, AnalyzerFilter, SnowballLanguage,Tokenizer}; +/// +/// let mut analyzer = define_analyzer("ascii").tokenizers([Tokenizer::Class]).filters([ +/// AnalyzerFilter::Lowercase, +/// AnalyzerFilter::Ascii, +/// AnalyzerFilter::Edgengram(2, 15), +/// AnalyzerFilter::Snowball(SnowballLanguage::English), +/// ]); +/// ``` +pub fn define_analyzer(name: &str) -> DefineAnalyzerStatement { + DefineAnalyzerStatement { + name: name.to_string(), + tokenizers: vec![], + filters: vec![], + bindings: vec![], + errors: vec![], + } +} + +impl DefineAnalyzerStatement { + /// Add a tokenizer to the analyzer + pub fn tokenizers(mut self, tokenizers: I) -> Self + where + I: IntoIterator, + { + self.tokenizers + .extend(tokenizers.into_iter().collect::>()); + self + } + + /// Add a filter to the analyzer + pub fn filters(mut self, filters: I) -> Self + where + I: IntoIterator, + { + self.filters.extend(filters.into_iter().collect::>()); + self + } +} + +impl Queryable for DefineAnalyzerStatement {} +impl Erroneous for DefineAnalyzerStatement { + fn get_errors(&self) -> ErrorList { + self.errors.to_vec() + } +} + +impl Parametric for DefineAnalyzerStatement { + fn get_bindings(&self) -> BindingsList { + self.bindings.to_vec() + } +} + +impl Buildable for DefineAnalyzerStatement { + fn build(&self) -> String { + let mut query = format!("DEFINE ANALYZER {}", self.name); + + if !self.tokenizers.is_empty() { + let tokenizers_str = self + .tokenizers + .iter() + .map(ToString::to_string) + .collect::>() + .join(","); + query = format!("{query} TOKENIZERS {}", tokenizers_str); + } + + if !self.filters.is_empty() { + let filters_str = self + .filters + .iter() + .map(ToString::to_string) + .collect::>() + .join(","); + query = format!("{query} FILTERS {}", filters_str); + } + + query.push(';'); + query + } +} + +impl Display for DefineAnalyzerStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.build()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_define_analyzer_statement() { + use AnalyzerFilter::*; + use Tokenizer::*; + + let analyzer = define_analyzer("ascii").tokenizers([Class]).filters([ + Lowercase, + Ascii, + Edgengram(2, 15), + Snowball(SnowballLanguage::English), + ]); + + assert_eq!( + analyzer.build(), + "DEFINE ANALYZER ascii TOKENIZERS Class FILTERS lowercase,ascii,edgengram(2, 15),snowball(english);" + ); + } +} diff --git a/query-builder/src/statements/mod.rs b/query-builder/src/statements/mod.rs index c85f98350e..653d817779 100644 --- a/query-builder/src/statements/mod.rs +++ b/query-builder/src/statements/mod.rs @@ -9,6 +9,7 @@ use std::fmt::Display; pub(crate) mod create; pub(crate) mod create_v2; +pub(crate) mod define_analyzer; pub(crate) mod define_database; pub(crate) mod define_event; pub(crate) mod define_field; @@ -50,6 +51,9 @@ pub(crate) mod utils_chain; pub use create::{create, CreateStatement}; pub use create_v2::{create_v2, CreateStatementV2}; +pub use define_analyzer::{ + define_analyzer, AnalyzerFilter, DefineAnalyzerStatement, SnowballLanguage, Tokenizer, +}; pub use define_database::{define_database, DefineDatabaseStatement}; pub use define_event::{define_event, DefineEventStatement}; pub use define_field::{define_field, DefineFieldStatement};