Skip to content

Commit

Permalink
ability to create sets and tags, still wip
Browse files Browse the repository at this point in the history
  • Loading branch information
avoonix committed Jun 29, 2024
1 parent dfad26d commit 3f1f919
Show file tree
Hide file tree
Showing 34 changed files with 2,429 additions and 687 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion fuzzle/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ panic = "deny"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

teloxide = { version = "0.12", features = ["macros", "ctrlc_handler", "rustls", "throttle"], default-features = false, git = "https://github.com/avoonix/teloxide.git", branch = "future-bot-api" }

actix-files = { version = "0.6" }
actix-web = { version = "4", features = ["macros"] }
anyhow = { version = "1.0.75" }
Expand All @@ -93,7 +96,6 @@ flate2 = { version = "1.0.28" }
reqwest = { version = "0.11.27", default-features = false, features = ["rustls", "blocking", "hyper-rustls", "__rustls", "__tls", "rustls-tls"] }
serde = { version = "1.0.192", features = ["derive"] }
tokio = { version = "1.33.0", features = ["macros", "rt-multi-thread", "tracing"] }
teloxide = { version = "0.12", features = ["macros", "ctrlc_handler", "rustls", "throttle"], default-features = false, git = "https://github.com/avoonix/teloxide.git", branch = "default-parse-mode-request-public" }
futures = { version = "0.3.29" }
serde_json = "1.0.108"
itertools = "0.13.0"
Expand Down
3 changes: 3 additions & 0 deletions fuzzle/migrations/2_tags/down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DROP TABLE IF EXISTS tag;

ALTER TABLE sticker_set DROP COLUMN created_by_user_id;
10 changes: 10 additions & 0 deletions fuzzle/migrations/2_tags/up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TABLE IF NOT EXISTS tag (
id TEXT NOT NULL PRIMARY KEY,
category INTEGER NOT NULL,
is_pending BOOLEAN NOT NULL CHECK (is_pending IN (0, 1)) DEFAULT 1,
dynamic_data TEXT NULL,
created_by_user_id INTEGER NULL REFERENCES user(id) ON UPDATE RESTRICT ON DELETE RESTRICT,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
);

ALTER TABLE sticker_set ADD COLUMN created_by_user_id INTEGER NULL REFERENCES user(id) ON UPDATE RESTRICT ON DELETE RESTRICT;
11 changes: 5 additions & 6 deletions fuzzle/src/background_tasks/background.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,22 @@ use crate::{
use crate::bot::report_periodic_task_error;

pub trait BackgroundTaskExt {
async fn process_sticker_set(&self, set_name: String);
async fn process_sticker_set(&self, set_name: String, ignore_last_fetched: bool);
async fn process_set_of_sticker(&self, sticker_unique_id: String);
async fn analyze_sticker(&self, sticker_unique_id: String);
}

impl BackgroundTaskExt for RequestContext {
async fn process_sticker_set(&self, set_name: String) {
async fn process_sticker_set(&self, set_name: String, ignore_last_fetched: bool) {
// TODO: retry on error
// TODO: add parameter ignore_last_fetched
let bot = self.bot.clone();
let admin_id = self.config.get_admin_user_id();
let database = self.database.clone();
let request_context = self.clone();
let span = tracing::info_span!("spawned_process_sticker_set");
tokio::spawn(async move {
let result =
import_all_stickers_from_set(&set_name, false, bot.clone(), database.clone(), request_context.config.clone(), request_context.vector_db.clone()).await;
import_all_stickers_from_set(&set_name, ignore_last_fetched, bot.clone(), database.clone(), request_context.config.clone(), request_context.vector_db.clone()).await;
report_periodic_task_error(result);
}.instrument(span));
}
Expand All @@ -40,7 +39,7 @@ impl BackgroundTaskExt for RequestContext {
Ok(Some(sticker_set)) => sticker_set.id,
Ok(None) => return warn!("sticker without set {sticker_unique_id}"),
Err(err) => {
tracing::error!("error while getting sticker set name: {err}");
tracing::error!("error while getting sticker set name: {err:?}");
return;
}
};
Expand All @@ -66,7 +65,7 @@ impl BackgroundTaskExt for RequestContext {
Ok(None) => return,
Ok(Some(analysis)) => analysis,
Err(err) => {
tracing::error!("database error while getting file info: {err}");
tracing::error!("database error while getting file info: {err:?}");
return;
}
};
Expand Down
7 changes: 5 additions & 2 deletions fuzzle/src/background_tasks/periodic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use crate::database::Database;
use crate::inference::text_to_clip_embedding;
use crate::message::send_database_export_to_chat;
use crate::qdrant::VectorDatabase;
use crate::simple_bot_api;
use crate::sticker::import_all_stickers_from_set;
use crate::tags::TagManager;
use crate::Config;
Expand Down Expand Up @@ -41,14 +42,14 @@ pub fn start_periodic_tasks(
let vector_db = vector_db_clone.clone();
tokio::spawn(async move {
loop {
sleep(Duration::minutes(4).to_std().expect("no overflow")).await;
sleep(Duration::minutes(5).to_std().expect("no overflow")).await;
let span = tracing::info_span!("periodic_refetch_stickers");
let bot = bot.clone();
let database = database.clone();
let paths = paths.clone();
let vector_db = vector_db.clone();
async move {
// fetching 69 sets every 10 minutes is about 10000 sets per day
// TODO: make this configurable
let result = refetch_stickers(
69,
database.clone(),
Expand Down Expand Up @@ -217,5 +218,7 @@ async fn refetch_stickers(
)
.await?;
}
let stats = database.get_stats().await?;
simple_bot_api::set_my_short_description(&config.telegram_bot_token, &format!("I organize {} furry sticker sets 💚 {} stickers 💚 {} taggings 💚 uwu", stats.sets, stats.stickers, stats.taggings)).await?;
Ok(())
}
24 changes: 24 additions & 0 deletions fuzzle/src/bot/bot_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,24 @@ pub enum UserError {

#[error("no results found")]
ListHasZeroResults(String),

#[error("channel has no username")]
ChannelWithoutUsername,

#[error("user has no username or does not allow sharing it")]
UserWithoutUsername,

#[error("invalid start parameter")]
InvalidStartParameter,

#[error("general validation error")]
ValidationError(String),

#[error("vector not found")]
VectorNotFound,

#[error("unique constraint violation")]
AlreadyExists(String),
}

impl InternalError {
Expand All @@ -108,6 +126,12 @@ impl UserError {
UserError::ParseError(position, rest) => (format!("Invalid input at position {position}: {}", rest.chars().take(10).collect::<String>()),UserErrorSeverity::Error),
UserError::TagsNotFound(tags) => (format!("Could not find tags: {}", tags.join(", ")),UserErrorSeverity::Error),
UserError::ListHasZeroResults(name) => (format!("No {name} here :("),UserErrorSeverity::Info),
UserError::ChannelWithoutUsername => ("The channel needs to have a public name.".to_string(), UserErrorSeverity::Error),
UserError::UserWithoutUsername => ("This user doesn't have a username or their privacy settings don't allow me to see it.".to_string(), UserErrorSeverity::Error),
UserError::InvalidStartParameter => ("No idea where to start.".to_string(), UserErrorSeverity::Error),
UserError::ValidationError(description) => (format!("Invalid data: {description}"), UserErrorSeverity::Error),
UserError::VectorNotFound => (format!("Come back later, looks like I'm not done processing this one"), UserErrorSeverity::Error),
UserError::AlreadyExists(name) => (format!("This {name} already exists"),UserErrorSeverity::Error),
}
}
}
Expand Down
13 changes: 9 additions & 4 deletions fuzzle/src/bot/user_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ use crate::bot::config::Config;
use crate::database::{Database, User};
use crate::qdrant::VectorDatabase;
use crate::tags::TagManager;
use itertools::Itertools;
use teloxide::prelude::*;
use teloxide::types::ChatKind;

use super::{Bot, BotError, RequestContext};

#[tracing::instrument(skip(update, config, database, tag_manager, bot, tagging_worker))]
#[tracing::instrument(skip(update, config, database, tag_manager, bot, tagging_worker, vector_db))]
pub async fn inject_context(
update: Update,
config: Arc<Config>,
Expand Down Expand Up @@ -50,12 +52,15 @@ async fn get_user(
database: Database,
bot: Bot,
) -> Result<User, BotError> {
// TODO: possibly cache users? TODO: measure how long this function takes
let Some(user) = update.from() else {
let Some(user_id) = (match (update.from(), update.chat()) {
(Some(user), _) => Some(user.id),
(None, Some(chat)) => chat.id.as_user(),
(None, None) => None,
}) else {
return Err(anyhow::anyhow!("user missing from telegram update").into());
};

get_or_create_user(user.id, config, database, bot).await
get_or_create_user(user_id, config, database, bot).await
}

pub async fn get_or_create_user(
Expand Down
Loading

0 comments on commit 3f1f919

Please sign in to comment.