Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

formatting via cargo fmt #15

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ impl<'a> Lexer<'a> {
token
}

fn chop_while<P>(&mut self, mut predicate: P) -> &'a [char] where P: FnMut(&char) -> bool {
fn chop_while<P>(&mut self, mut predicate: P) -> &'a [char]
where
P: FnMut(&char) -> bool,
{
let mut n = 0;
while n < self.content.len() && predicate(&self.content[n]) {
n += 1;
Expand All @@ -30,15 +33,19 @@ impl<'a> Lexer<'a> {
pub fn next_token(&mut self) -> Option<String> {
self.trim_left();
if self.content.is_empty() {
return None
return None;
}

if self.content[0].is_numeric() {
return Some(self.chop_while(|x| x.is_numeric()).iter().collect());
}

if self.content[0].is_alphabetic() {
let term = self.chop_while(|x| x.is_alphanumeric()).iter().map(|x| x.to_ascii_lowercase()).collect::<String>();
let term = self
.chop_while(|x| x.is_alphanumeric())
.iter()
.map(|x| x.to_ascii_lowercase())
.collect::<String>();
let mut env = crate::snowball::SnowballEnv::create(&term);
crate::snowball::algorithms::english_stemmer::stem(&mut env);
let stemmed_term = env.get_current().to_string();
Expand Down
109 changes: 73 additions & 36 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,38 +1,47 @@
use std::fs::{self, File};
use std::path::{Path};
use xml::reader::{XmlEvent, EventReader};
use xml::common::{Position, TextPosition};
use std::env;
use std::result::Result;
use std::fs::{self, File};
use std::io::{BufReader, BufWriter};
use std::path::Path;
use std::process::ExitCode;
use std::result::Result;
use std::str;
use std::io::{BufReader, BufWriter};
use std::sync::{Arc, Mutex};
use std::thread;
use xml::common::{Position, TextPosition};
use xml::reader::{EventReader, XmlEvent};

mod model;
use model::*;
mod server;
mod lexer;
mod server;
pub mod snowball;

fn parse_entire_txt_file(file_path: &Path) -> Result<String, ()> {
fs::read_to_string(file_path).map_err(|err| {
eprintln!("ERROR: coult not open file {file_path}: {err}", file_path = file_path.display());
eprintln!(
"ERROR: coult not open file {file_path}: {err}",
file_path = file_path.display()
);
})
}

fn parse_entire_xml_file(file_path: &Path) -> Result<String, ()> {
let file = File::open(file_path).map_err(|err| {
eprintln!("ERROR: could not open file {file_path}: {err}", file_path = file_path.display());
eprintln!(
"ERROR: could not open file {file_path}: {err}",
file_path = file_path.display()
);
})?;
let er = EventReader::new(BufReader::new(file));
let mut content = String::new();
for event in er.into_iter() {
let event = event.map_err(|err| {
let TextPosition {row, column} = err.position();
let TextPosition { row, column } = err.position();
let msg = err.msg();
eprintln!("{file_path}:{row}:{column}: ERROR: {msg}", file_path = file_path.display());
eprintln!(
"{file_path}:{row}:{column}: ERROR: {msg}",
file_path = file_path.display()
);
})?;

if let XmlEvent::Characters(text) = event {
Expand All @@ -44,18 +53,25 @@ fn parse_entire_xml_file(file_path: &Path) -> Result<String, ()> {
}

fn parse_entire_file_by_extension(file_path: &Path) -> Result<String, ()> {
let extension = file_path.extension().ok_or_else(|| {
eprintln!("ERROR: can't detect file type of {file_path} without extension",
file_path = file_path.display());
})?.to_string_lossy();
let extension = file_path
.extension()
.ok_or_else(|| {
eprintln!(
"ERROR: can't detect file type of {file_path} without extension",
file_path = file_path.display()
);
})?
.to_string_lossy();
match extension.as_ref() {
"xhtml" | "xml" => parse_entire_xml_file(file_path),
// TODO: specialized parser for markdown files
"txt" | "md" => parse_entire_txt_file(file_path),
_ => {
eprintln!("ERROR: can't detect file type of {file_path}: unsupported extension {extension}",
file_path = file_path.display(),
extension = extension);
eprintln!(
"ERROR: can't detect file type of {file_path}: unsupported extension {extension}",
file_path = file_path.display(),
extension = extension
);
Err(())
}
}
Expand All @@ -75,30 +91,48 @@ fn save_model_as_json(model: &Model, index_path: &str) -> Result<(), ()> {
Ok(())
}

fn add_folder_to_model(dir_path: &Path, model: Arc<Mutex<Model>>, processed: &mut usize) -> Result<(), ()> {
fn add_folder_to_model(
dir_path: &Path,
model: Arc<Mutex<Model>>,
processed: &mut usize,
) -> Result<(), ()> {
let dir = fs::read_dir(dir_path).map_err(|err| {
eprintln!("ERROR: could not open directory {dir_path} for indexing: {err}",
dir_path = dir_path.display());
eprintln!(
"ERROR: could not open directory {dir_path} for indexing: {err}",
dir_path = dir_path.display()
);
})?;

'next_file: for file in dir {
let file = file.map_err(|err| {
eprintln!("ERROR: could not read next file in directory {dir_path} during indexing: {err}",
dir_path = dir_path.display());
eprintln!(
"ERROR: could not read next file in directory {dir_path} during indexing: {err}",
dir_path = dir_path.display()
);
})?;

let file_path = file.path();
let file_type = file.file_type().map_err(|err| {
eprintln!("ERROR: could not determine type of file {file_path}: {err}",
file_path = file_path.display());
})?;
let last_modified = file.metadata().map_err(|err| {
eprintln!("ERROR: could not get the metadata of file {file_path}: {err}",
file_path = file_path.display());
})?.modified().map_err(|err| {
eprintln!("ERROR: could not get the last modification date of file {file_path}: {err}",
file_path = file_path.display())
eprintln!(
"ERROR: could not determine type of file {file_path}: {err}",
file_path = file_path.display()
);
})?;
let last_modified = file
.metadata()
.map_err(|err| {
eprintln!(
"ERROR: could not get the metadata of file {file_path}: {err}",
file_path = file_path.display()
);
})?
.modified()
.map_err(|err| {
eprintln!(
"ERROR: could not get the last modification date of file {file_path}: {err}",
file_path = file_path.display()
)
})?;

if file_type.is_dir() {
add_folder_to_model(&file_path, Arc::clone(&model), processed)?;
Expand Down Expand Up @@ -162,9 +196,11 @@ fn entry() -> Result<(), ()> {
eprintln!("ERROR: could not open index file {index_path}: {err}");
})?;

model = Arc::new(Mutex::new(serde_json::from_reader(index_file).map_err(|err| {
eprintln!("ERROR: could not parse index file {index_path}: {err}");
})?));
model = Arc::new(Mutex::new(serde_json::from_reader(index_file).map_err(
|err| {
eprintln!("ERROR: could not parse index file {index_path}: {err}");
},
)?));
} else {
model = Arc::new(Mutex::new(Default::default()));
}
Expand All @@ -174,7 +210,8 @@ fn entry() -> Result<(), ()> {
thread::spawn(move || {
let mut processed = 0;
// TODO: what should we do in case indexing thread crashes
add_folder_to_model(Path::new(&dir_path), Arc::clone(&model), &mut processed).unwrap();
add_folder_to_model(Path::new(&dir_path), Arc::clone(&model), &mut processed)
.unwrap();
if processed > 0 {
let model = model.lock().unwrap();
save_model_as_json(&model, index_path).unwrap();
Expand Down
22 changes: 17 additions & 5 deletions src/model.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashMap;
use std::path::{PathBuf, Path};
use serde::{Deserialize, Serialize};
use super::lexer::Lexer;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::SystemTime;

type DocFreq = HashMap<String, usize>;
Expand Down Expand Up @@ -54,7 +54,12 @@ impl Model {
result
}

pub fn add_document(&mut self, file_path: PathBuf, last_modified: SystemTime, content: &[char]) {
pub fn add_document(
&mut self,
file_path: PathBuf,
last_modified: SystemTime,
content: &[char],
) {
self.remove_document(&file_path);

let mut tf = TermFreq::new();
Expand All @@ -77,7 +82,14 @@ impl Model {
}
}

self.docs.insert(file_path, Doc {count, tf, last_modified});
self.docs.insert(
file_path,
Doc {
count,
tf,
last_modified,
},
);
}
}

Expand Down
31 changes: 17 additions & 14 deletions src/server.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::fs::File;
use std::str;
use std::io;
use std::str;
use std::sync::{Arc, Mutex};

use super::model::*;

use tiny_http::{Server, Request, Response, Header, Method, StatusCode};
use tiny_http::{Header, Method, Request, Response, Server, StatusCode};

fn serve_404(request: Request) -> io::Result<()> {
request.respond(Response::from_string("404").with_status_code(StatusCode(404)))
Expand All @@ -16,7 +16,8 @@ fn serve_500(request: Request) -> io::Result<()> {
}

fn serve_400(request: Request, message: &str) -> io::Result<()> {
request.respond(Response::from_string(format!("400: {message}")).with_status_code(StatusCode(400)))
request
.respond(Response::from_string(format!("400: {message}")).with_status_code(StatusCode(400)))
}

fn serve_static_file(request: Request, file_path: &str, content_type: &str) -> io::Result<()> {
Expand Down Expand Up @@ -61,7 +62,7 @@ fn serve_api_search(model: Arc<Mutex<Model>>, mut request: Request) -> io::Resul
Ok(json) => json,
Err(err) => {
eprintln!("ERROR: could not convert search results to JSON: {err}");
return serve_500(request)
return serve_500(request);
}
};

Expand All @@ -71,21 +72,21 @@ fn serve_api_search(model: Arc<Mutex<Model>>, mut request: Request) -> io::Resul
}

fn serve_request(model: Arc<Mutex<Model>>, request: Request) -> io::Result<()> {
println!("INFO: received request! method: {:?}, url: {:?}", request.method(), request.url());
println!(
"INFO: received request! method: {:?}, url: {:?}",
request.method(),
request.url()
);

match (request.method(), request.url()) {
(Method::Post, "/api/search") => {
serve_api_search(model, request)
}
(Method::Post, "/api/search") => serve_api_search(model, request),
(Method::Get, "/index.js") => {
serve_static_file(request, "index.js", "text/javascript; charset=utf-8")
}
(Method::Get, "/") | (Method::Get, "/index.html") => {
serve_static_file(request, "index.html", "text/html; charset=utf-8")
}
_ => {
serve_404(request)
}
_ => serve_404(request),
}
}

Expand All @@ -97,9 +98,11 @@ pub fn start(address: &str, model: Arc<Mutex<Model>>) -> Result<(), ()> {
println!("INFO: listening at http://{address}/");

for request in server.incoming_requests() {
serve_request(Arc::clone(&model), request).map_err(|err| {
eprintln!("ERROR: could not serve the response: {err}");
}).ok(); // <- don't stop on errors, keep serving
serve_request(Arc::clone(&model), request)
.map_err(|err| {
eprintln!("ERROR: could not serve the response: {err}");
})
.ok(); // <- don't stop on errors, keep serving
}

eprintln!("ERROR: the server socket has shutdown");
Expand Down
15 changes: 6 additions & 9 deletions src/snowball/algorithms/english_stemmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#![allow(unused_mut)]
#![allow(unused_parens)]
#![allow(unused_variables)]
use crate::snowball::SnowballEnv;
use crate::snowball::Among;
use crate::snowball::SnowballEnv;

static A_0: &'static [Among<Context>; 3] = &[
Among("arsen", -1, -1, None),
Expand Down Expand Up @@ -114,10 +114,7 @@ static A_7: &'static [Among<Context>; 18] = &[
Among("ement", 16, 1, None),
];

static A_8: &'static [Among<Context>; 2] = &[
Among("e", -1, 1, None),
Among("l", -1, 2, None),
];
static A_8: &'static [Among<Context>; 2] = &[Among("e", -1, 1, None), Among("l", -1, 2, None)];

static A_9: &'static [Among<Context>; 8] = &[
Among("succeed", -1, -1, None),
Expand Down Expand Up @@ -195,7 +192,7 @@ fn r_prelude(env: &mut SnowballEnv, context: &mut Context) -> bool {
env.cursor = v_2;
let v_3 = env.cursor;
'lab2: loop {
'replab3: loop{
'replab3: loop {
let v_4 = env.cursor;
'lab4: for _ in 0..1 {
'golab5: loop {
Expand Down Expand Up @@ -336,14 +333,14 @@ fn r_shortv(env: &mut SnowballEnv, context: &mut Context) -> bool {
}

fn r_R1(env: &mut SnowballEnv, context: &mut Context) -> bool {
if !(context.i_p1 <= env.cursor){
if !(context.i_p1 <= env.cursor) {
return false;
}
return true;
}

fn r_R2(env: &mut SnowballEnv, context: &mut Context) -> bool {
if !(context.i_p2 <= env.cursor){
if !(context.i_p2 <= env.cursor) {
return false;
}
return true;
Expand Down Expand Up @@ -803,7 +800,7 @@ fn r_postlude(env: &mut SnowballEnv, context: &mut Context) -> bool {
if !context.b_Y_found {
return false;
}
'replab0: loop{
'replab0: loop {
let v_1 = env.cursor;
'lab1: for _ in 0..1 {
'golab2: loop {
Expand Down
Loading