From 3c705d03d4f3fa4ec0bd1c9749d0d09a67fa9bec Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 8 Aug 2023 11:16:46 +0530 Subject: [PATCH 1/7] refactor: migrate to ChromaDB --- .env.example | 2 +- Cargo.lock | 298 +++++------------------------------- Cargo.toml | 3 +- src/constants.rs | 4 - src/conversation/mod.rs | 4 +- src/conversation/prompts.rs | 4 +- src/db/chroma.rs | 104 +++++++++++++ src/db/mod.rs | 15 +- src/db/qdrant.rs | 130 ---------------- src/main.rs | 2 +- src/routes/mod.rs | 26 ++-- src/utils/functions.rs | 5 +- 12 files changed, 176 insertions(+), 421 deletions(-) create mode 100644 src/db/chroma.rs delete mode 100644 src/db/qdrant.rs diff --git a/.env.example b/.env.example index 1394037..0ff1466 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,4 @@ OPENAI_API_KEY= -QDRANT_URL= #Defaults to http://localhost:6334 +CHROMA_URL= #Defaults to http://localhost:8000 RUST_LOG= #Logging levels: "error", "warn", "info", "debug", "trace" WEBSERVER_PORT= #Defaults to 3000 \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 5d6064d..51541ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,28 +390,6 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" -[[package]] -name = "async-stream" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.28", -] - [[package]] name = "async-trait" version = "0.1.72" @@ -441,51 +419,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" -[[package]] -name = "axum" -version = "0.6.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a1de45611fdb535bfde7b7de4fd54f4fd2b17b1737c0a59b69bf9b92074b8c" -dependencies = [ - "async-trait", - "axum-core", - "bitflags 1.3.2", - "bytes", - "futures-util", - "http", - "http-body", - "hyper", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "mime", - "rustversion", - "tower-layer", - "tower-service", -] - [[package]] name = "backtrace" version = "0.3.68" @@ -647,6 +580,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chromadb" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea5a918d6cfd50106ad67ae5015fb5022361cdf81ee3b1d49f75ef3d918d350" +dependencies = [ + "anyhow", + "minreq", + "serde", + "serde_json", +] + [[package]] name = "cipher" version = "0.4.4" @@ -1178,7 +1123,6 @@ dependencies = [ "futures-core", "futures-io", "futures-macro", - "futures-sink", "futures-task", "memchr", "pin-project-lite", @@ -1354,23 +1298,11 @@ dependencies = [ "futures-util", "http", "hyper", - "rustls", + "rustls 0.21.6", "tokio", "tokio-rustls", ] -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - [[package]] name = "hyper-tls" version = "0.5.0" @@ -1620,12 +1552,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58093314a45e00c77d5c508f76e77c3396afbbc0d01506e7fae47b018bac2b1d" -[[package]] -name = "matchit" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67827e6ea8ee8a7c4a72227ef4fc08957040acffdb5f122733b24fa12daff41b" - [[package]] name = "matrixmultiply" version = "0.3.7" @@ -1678,6 +1604,21 @@ dependencies = [ "adler", ] +[[package]] +name = "minreq" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3de406eeb24aba36ed3829532fa01649129677186b44a49debec0ec574ca7da7" +dependencies = [ + "log", + "once_cell", + "rustls 0.20.8", + "serde", + "serde_json", + "webpki", + "webpki-roots 0.22.6", +] + [[package]] name = "mio" version = "0.8.8" @@ -1848,13 +1789,12 @@ dependencies = [ "actix-web", "actix-web-lab", "anyhow", - "async-trait", + "chromadb", "dotenv", "env_logger", "ndarray", "openai-api-rs", "ort", - "qdrant-client", "rayon", "reqwest", "rust-fuzzy-search", @@ -2055,54 +1995,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prost" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-derive" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" -dependencies = [ - "anyhow", - "itertools 0.10.5", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "prost-types" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" -dependencies = [ - "prost", -] - -[[package]] -name = "qdrant-client" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94dc0c49ac6893463bd32f5adfe9e503c3d83fac7ee58534e1c0e1c76842dac8" -dependencies = [ - "anyhow", - "futures-util", - "prost", - "prost-types", - "reqwest", - "serde", - "serde_json", - "tonic", -] - [[package]] name = "quote" version = "1.0.32" @@ -2270,7 +2162,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.6", "rustls-pemfile", "serde", "serde_json", @@ -2278,12 +2170,10 @@ dependencies = [ "tokio", "tokio-native-tls", "tokio-rustls", - "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams", "web-sys", "webpki-roots 0.22.6", "winreg", @@ -2340,26 +2230,26 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.6" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" +checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" dependencies = [ "log", "ring", - "rustls-webpki 0.101.2", "sct", + "webpki", ] [[package]] -name = "rustls-native-certs" -version = "0.6.3" +name = "rustls" +version = "0.21.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" dependencies = [ - "openssl-probe", - "rustls-pemfile", - "schannel", - "security-framework", + "log", + "ring", + "rustls-webpki 0.101.2", + "sct", ] [[package]] @@ -2391,12 +2281,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "rustversion" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" - [[package]] name = "ryu" version = "1.0.15" @@ -2621,12 +2505,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "tar" version = "0.4.39" @@ -2793,16 +2671,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b74022ada614a1b4834de765f9bb43877f910cc8ce4be40e89042c9223a8bf" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "2.1.0" @@ -2830,18 +2698,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", - "tokio", -] - -[[package]] -name = "tokio-stream" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" -dependencies = [ - "futures-core", - "pin-project-lite", + "rustls 0.21.6", "tokio", ] @@ -2859,64 +2716,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tonic" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" -dependencies = [ - "async-stream", - "async-trait", - "axum", - "base64 0.21.2", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost", - "rustls-native-certs", - "rustls-pemfile", - "tokio", - "tokio-rustls", - "tokio-stream", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tower-layer" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" - [[package]] name = "tower-service" version = "0.3.2" @@ -3043,7 +2842,7 @@ dependencies = [ "base64 0.21.2", "log", "once_cell", - "rustls", + "rustls 0.21.6", "rustls-webpki 0.100.1", "url", "webpki-roots 0.23.1", @@ -3188,19 +2987,6 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" -[[package]] -name = "wasm-streams" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "web-sys" version = "0.3.64" diff --git a/Cargo.toml b/Cargo.toml index 78c4933..e0a3048 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,11 +11,9 @@ repository = "https://github.com/open-sauced/repo-query" [dependencies] actix-web = "4" anyhow = "1" -async-trait = "0.1" dotenv = "0.15" ndarray = "0.15" ort = { version = "1", features = ["load-dynamic"] } -qdrant-client = "1" rayon = "1" reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] } serde = "1" @@ -31,3 +29,4 @@ tracing-actix-web = "0.7" env_logger = "0.10" tokio = { version = "1", default-features = false } actix-cors = "0.6.4" +chromadb = "0.3" diff --git a/src/constants.rs b/src/constants.rs index 6cb5f3e..203ec1e 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -2,10 +2,6 @@ use std::ops::RangeInclusive; // Env var defaults pub const WEBSERVER_PORT_DEFAULT: &str = "3000"; -pub const QDRANT_URL_DEFAULT: &str = "http://localhost:6334"; - -//Embeddings -pub const EMBEDDINGS_DIMENSION: usize = 384; //Actix-web pub const SSE_CHANNEL_BUFFER_SIZE: usize = 1; diff --git a/src/conversation/mod.rs b/src/conversation/mod.rs index e80067c..f4c7fb1 100644 --- a/src/conversation/mod.rs +++ b/src/conversation/mod.rs @@ -54,7 +54,7 @@ impl Conversation { ChatCompletionMessage { name: None, function_call: None, - role: MessageRole::system, + role: MessageRole::user, content: system_message(), }, ChatCompletionMessage { @@ -80,7 +80,7 @@ impl Conversation { self.messages[0] = ChatCompletionMessage { name: None, function_call: None, - role: MessageRole::system, + role: MessageRole::user, content: answer_generation_prompt(), } } diff --git a/src/conversation/prompts.rs b/src/conversation/prompts.rs index 5fad03f..af13eed 100644 --- a/src/conversation/prompts.rs +++ b/src/conversation/prompts.rs @@ -131,9 +131,9 @@ Follow these rules at all times: pub fn answer_generation_prompt() -> String { String::from( r#"Your job is to answer a user query about a GitHub repository's codebase. -Given is the history of the function calls made by you to retrieve all relevant information from the repository and their responses +Given is the history of the function calls made by you to retrieve all relevant information about a user's query from the repository Follow these rules at all times: -- Use the information from the function calls to generate a response +- Use only the relevant information from the function calls to generate a response - Do NOT assume the structure of the codebase, or the existence of files or folders - Each function response has path information that you can use to cite the source - The user's query includes the repository information to which the query pertains diff --git a/src/db/chroma.rs b/src/db/chroma.rs new file mode 100644 index 0000000..9071ffd --- /dev/null +++ b/src/db/chroma.rs @@ -0,0 +1,104 @@ +use chromadb::v1::{ + collection::{CollectionEntries, GetOptions, QueryOptions}, + ChromaClient, +}; + +use crate::{ + constants::MAX_FILES_COUNT, + embeddings::Embeddings, + github::{Repository, RepositoryEmbeddings, RepositoryFilePaths}, + prelude::*, +}; + +use super::RepositoryEmbeddingsDB; + +pub struct ChromaDB { + client: ChromaClient, +} + +impl ChromaDB { + pub fn initialize() -> Result { + let client = ChromaClient::new(Default::default()); + Ok(ChromaDB { client }) + } +} + +impl RepositoryEmbeddingsDB for ChromaDB { + fn insert_repo_embeddings(&self, repo: RepositoryEmbeddings) -> Result<()> { + let collection = self.client.get_or_create_collection(&repo.repo_id, None)?; + let collection_entries = CollectionEntries { + //Save the file paths as ids Eg: src/pages/index.js + ids: repo + .file_embeddings + .iter() + .map(|fe| fe.path.as_str()) + .collect(), + embeddings: Some( + repo.file_embeddings + .iter() + .map(|fe| fe.embeddings.clone()) + .collect(), + ), + metadatas: None, + documents: None, + }; + collection.upsert(collection_entries, None)?; + Ok(()) + } + + fn get_relevant_file_paths( + &self, + repository: &Repository, + query_embeddings: Embeddings, + limit: usize, + ) -> Result { + let collection = self + .client + .get_or_create_collection(&repository.to_string(), None)?; + let query_options = QueryOptions { + n_results: Some(limit), + query_embeddings: Some(vec![query_embeddings]), + query_texts: None, + where_document: None, + where_metadata: None, + // We don't need the documents, embeddings, distances, or metadata. Only the ids. + include: Some(vec![]), + }; + + let results = collection.query(query_options, None)?; + + Ok(RepositoryFilePaths { + repo_id: repository.to_string(), + file_paths: results.ids[0].clone(), + }) + } + + fn get_file_paths(&self, repository: &Repository) -> Result { + let collection = self + .client + .get_or_create_collection(&repository.to_string(), None)?; + + let get_options = GetOptions { + include: Some(vec![]), + limit: Some(MAX_FILES_COUNT), + offset: None, + ids: vec![], + where_document: None, + where_metadata: None, + }; + + let results = collection.get(get_options)?; + + Ok(RepositoryFilePaths { + repo_id: repository.to_string(), + file_paths: results.ids, + }) + } + + fn is_indexed(&self, repository: &Repository) -> Result { + match self.client.get_collection(&repository.to_string()) { + Ok(_) => Ok(true), + Err(_) => Ok(false), + } + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs index f9c2837..861fa5f 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -1,23 +1,22 @@ use crate::embeddings::Embeddings; use crate::github::{Repository, RepositoryEmbeddings, RepositoryFilePaths}; use crate::prelude::*; -mod qdrant; -use async_trait::async_trait; -pub use qdrant::*; +mod chroma; + +pub use chroma::*; -#[async_trait] pub trait RepositoryEmbeddingsDB { - async fn insert_repo_embeddings(&self, repo: RepositoryEmbeddings) -> Result<()>; + fn insert_repo_embeddings(&self, repo: RepositoryEmbeddings) -> Result<()>; - async fn get_relevant_files( + fn get_relevant_file_paths( &self, repository: &Repository, query_embeddings: Embeddings, limit: usize, ) -> Result; - async fn get_file_paths(&self, repository: &Repository) -> Result; + fn get_file_paths(&self, repository: &Repository) -> Result; - async fn is_indexed(&self, repository: &Repository) -> Result; + fn is_indexed(&self, repository: &Repository) -> Result; } diff --git a/src/db/qdrant.rs b/src/db/qdrant.rs deleted file mode 100644 index 16090f9..0000000 --- a/src/db/qdrant.rs +++ /dev/null @@ -1,130 +0,0 @@ -use std::collections::HashMap; - -use super::RepositoryEmbeddingsDB; -use crate::{ - constants::{EMBEDDINGS_DIMENSION, MAX_FILES_COUNT, QDRANT_URL_DEFAULT}, - embeddings::Embeddings, - github::{FileEmbeddings, Repository, RepositoryEmbeddings, RepositoryFilePaths}, - prelude::*, -}; -use anyhow::Ok; -use async_trait::async_trait; -use qdrant_client::{ - prelude::*, - qdrant::{vectors_config::Config, ScrollPoints, VectorParams, VectorsConfig}, -}; -use rayon::prelude::*; - -pub struct QdrantDB { - client: QdrantClient, -} - -#[async_trait] -impl RepositoryEmbeddingsDB for QdrantDB { - async fn insert_repo_embeddings(&self, repo: RepositoryEmbeddings) -> Result<()> { - if self.client.has_collection(&repo.repo_id).await? { - self.client.delete_collection(&repo.repo_id).await?; - } - self.client - .create_collection(&CreateCollection { - collection_name: repo.repo_id.clone(), - vectors_config: Some(VectorsConfig { - config: Some(Config::Params(VectorParams { - size: EMBEDDINGS_DIMENSION as u64, - distance: Distance::Cosine.into(), - ..Default::default() - })), - }), - ..Default::default() - }) - .await?; - - let points: Vec = repo - .file_embeddings - .into_par_iter() - .enumerate() - .map(|file| { - let FileEmbeddings { path, embeddings } = file.1; - let payload: Payload = HashMap::from([("path", path.into())]).into(); - - PointStruct::new(file.0 as u64, embeddings, payload) - }) - .collect(); - self.client - .upsert_points(repo.repo_id, points, None) - .await?; - Ok(()) - } - - async fn get_relevant_files( - &self, - repository: &Repository, - query_embeddings: Embeddings, - limit: usize, - ) -> Result { - let search_response = self - .client - .search_points(&SearchPoints { - collection_name: repository.to_string(), - vector: query_embeddings, - with_payload: Some(true.into()), - limit: limit as u64, - ..Default::default() - }) - .await?; - let paths: Vec = search_response - .result - .into_iter() - .map(|point| point.payload["path"].to_string().replace('\"', "")) - .collect(); - Ok(RepositoryFilePaths { - repo_id: repository.to_string(), - file_paths: paths, - }) - } - - async fn get_file_paths(&self, repository: &Repository) -> Result { - let scroll_reponse = self - .client - .scroll(&ScrollPoints { - collection_name: repository.to_string(), - offset: None, - filter: None, - limit: Some(MAX_FILES_COUNT as u32), - with_payload: Some(true.into()), - with_vectors: None, - read_consistency: None, - }) - .await?; - - let file_paths: Vec = scroll_reponse - .result - .par_iter() - .map(|point| point.payload["path"].to_string().replace('\"', "")) - .collect(); - Ok(RepositoryFilePaths { - repo_id: repository.to_string(), - file_paths, - }) - } - - async fn is_indexed(&self, repository: &Repository) -> Result { - self.client.has_collection(repository.to_string()).await - } -} - -impl QdrantDB { - pub fn initialize() -> Result { - let mut qdrant_url = - std::env::var("QDRANT_URL").unwrap_or(String::from(QDRANT_URL_DEFAULT)); - dbg!(&qdrant_url); - - if qdrant_url.is_empty() { - qdrant_url = QDRANT_URL_DEFAULT.to_string(); - } - - let config = QdrantClientConfig::from_url(&qdrant_url); - let client = QdrantClient::new(Some(config))?; - Ok(QdrantDB { client }) - } -} diff --git a/src/main.rs b/src/main.rs index 28ddf73..7a3ba15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,7 @@ async fn main() -> std::io::Result<()> { env_logger::init_from_env(Env::default().default_filter_or("info")); let model: Arc = Arc::new(embeddings::Onnx::new(Path::new("model")).unwrap()); - let db: Arc = Arc::new(db::QdrantDB::initialize().unwrap()); + let db: Arc = Arc::new(db::ChromaDB::initialize().unwrap()); let mut port = std::env::var("WEBSERVER_PORT").unwrap_or(WEBSERVER_PORT_DEFAULT.into()); if port.is_empty() { diff --git a/src/routes/mod.rs b/src/routes/mod.rs index 23c9fdc..4843786 100644 --- a/src/routes/mod.rs +++ b/src/routes/mod.rs @@ -1,11 +1,14 @@ #![allow(unused_must_use)] pub mod events; -use crate::constants::SSE_CHANNEL_BUFFER_SIZE; +use crate::{ + constants::SSE_CHANNEL_BUFFER_SIZE, + conversation::{Conversation, Query}, + db::{ChromaDB, RepositoryEmbeddingsDB}, + embeddings::Onnx, + github::{embed_repo, fetch_license_info, fetch_repo_files, Repository}, + routes::events::QueryEvent, +}; -use crate::conversation::{Conversation, Query}; -use crate::github::{fetch_license_info, fetch_repo_files}; -use crate::routes::events::QueryEvent; -use crate::{db::RepositoryEmbeddingsDB, github::Repository}; use actix_web::web::Query as ActixQuery; use actix_web::HttpResponse; use actix_web::{ @@ -18,13 +21,12 @@ use actix_web_lab::sse; use serde_json::json; use std::sync::Arc; -use crate::{db::QdrantDB, embeddings::Onnx, github::embed_repo}; use events::{emit, EmbedEvent}; #[post("/embed")] async fn embeddings( data: Json, - db: web::Data>, + db: web::Data>, model: web::Data>, ) -> Result { let license_info = fetch_license_info(&data).await.map_err(ErrorBadRequest)?; @@ -51,7 +53,7 @@ async fn embeddings( let embeddings = embed_repo(&repository, files, model.get_ref().as_ref()).await?; emit(&sender, EmbedEvent::SaveEmbeddings(None)).await; - db.get_ref().insert_repo_embeddings(embeddings).await?; + db.get_ref().insert_repo_embeddings(embeddings)?; emit(&sender, EmbedEvent::Done(None)).await; Ok::<(), anyhow::Error>(()) @@ -69,10 +71,10 @@ async fn embeddings( #[post("/query")] async fn query( data: Json, - db: web::Data>, + db: web::Data>, model: web::Data>, ) -> Result { - if db.is_indexed(&data.repository).await.unwrap_or_default() { + if db.is_indexed(&data.repository).unwrap_or_default() { let (sender, rx) = sse::channel(SSE_CHANNEL_BUFFER_SIZE); actix_rt::spawn(async move { @@ -103,9 +105,9 @@ async fn query( #[get("/collection")] async fn repo( data: ActixQuery, - db: web::Data>, + db: web::Data>, ) -> Result { - let is_indexed = db.is_indexed(&data.into_inner()).await.unwrap_or_default(); + let is_indexed = db.is_indexed(&data.into_inner()).unwrap_or_default(); if is_indexed { Ok(HttpResponse::Ok()) diff --git a/src/utils/functions.rs b/src/utils/functions.rs index 3e4ed77..8539709 100644 --- a/src/utils/functions.rs +++ b/src/utils/functions.rs @@ -31,8 +31,7 @@ pub async fn search_codebase( ) -> Result> { let query_embeddings = model.embed(query)?; let relevant_files = db - .get_relevant_files(repository, query_embeddings, files_limit) - .await? + .get_relevant_file_paths(repository, query_embeddings, files_limit)? .file_paths; let mut relevant_chunks: Vec = Vec::new(); for path in relevant_files { @@ -88,7 +87,7 @@ pub async fn search_path( db: &D, limit: usize, ) -> Result> { - let list = db.get_file_paths(repository).await?; + let list = db.get_file_paths(repository)?; let file_paths: Vec<&str> = list.file_paths.iter().map(String::as_ref).collect(); let response: Vec<(&str, f32)> = rust_fuzzy_search::fuzzy_search_best_n(path, &file_paths, limit); From 6d1a6140daf8fff7cd7d41241d8b79fb60d9214f Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 8 Aug 2023 13:18:02 +0530 Subject: [PATCH 2/7] build: add chromadb service --- docker-compose.yaml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index dab60f7..398ce3f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,11 +1,8 @@ version: '3.1' services: - qdrant: - image: qdrant/qdrant:latest - ports: - - "6333:6333" - - "6334:6334" + chroma: + image: ghcr.io/chroma-core/chroma:0.4.5 repo-query: image: open-sauced-repo-query:latest @@ -13,10 +10,10 @@ services: # Consume env variables from a .env file found in the same directory # as this docker-compose file. See ".env.example" for more details. OPENAI_API_KEY: ${OPENAI_API_KEY} - QDRANT_URL: "http://qdrant:6334" RUST_LOG: "info" + CHROMA_URL: "http://0.0.0.0:8000" WEBSERVER_PORT: ${WEBSERVER_PORT} depends_on: - - qdrant + - chroma ports: - "${WEBSERVER_PORT}:${WEBSERVER_PORT}" From 117a93398a6c771f004d328ac1f39ea546c6ad81 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 8 Aug 2023 13:18:19 +0530 Subject: [PATCH 3/7] docs: Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index fc68b24..3125d3b 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ To run the project locally, there are a few prerequisites: - The [Rust toolchain](https://www.rust-lang.org/learn/get-started) - The [Onnx Runtime](https://onnxruntime.ai/docs/install/). Will be downloaded and installed automatically when building the project. -- [Docker](https://docs.docker.com/engine/install/) to run the [QdrantDB](https://qdrant.tech/) instance. +- [Docker](https://docs.docker.com/engine/install/) to run the [ChromaDB](https://www.trychroma.com/) instance. - `make` for easy automation and development workflow Once, the above requirements are satisfied, you can run the project like so: @@ -119,8 +119,8 @@ The project requires the following environment variables to be set. Start Docker and run the following commands to spin-up a Docker container with a QdrantDB image. ``` -docker pull qdrant/qdrant -docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant +docker pull ghcr.io/chroma-core/chroma:0.4.5 +docker run -p 8000:8000 ghcr.io/chroma-core/chroma:0.4.5 ``` The database dashboard will be accessible at [localhost:6333/dashboard](http://localhost:6333/dashboard), the project communicates with the DB on port `6334`. From 343c2bf7f2cadc967693e283bdf9981d0ec3aae2 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 8 Aug 2023 18:54:15 +0530 Subject: [PATCH 4/7] chore: CHROMA_URL service name --- docker-compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 398ce3f..7320aea 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -2,7 +2,7 @@ version: '3.1' services: chroma: - image: ghcr.io/chroma-core/chroma:0.4.5 + image: ghcr.io/chroma-core/chroma:0.4.5 # Spins up a ChromaDB instance at port 8000 repo-query: image: open-sauced-repo-query:latest @@ -11,7 +11,7 @@ services: # as this docker-compose file. See ".env.example" for more details. OPENAI_API_KEY: ${OPENAI_API_KEY} RUST_LOG: "info" - CHROMA_URL: "http://0.0.0.0:8000" + CHROMA_URL: "http://chroma:8000" WEBSERVER_PORT: ${WEBSERVER_PORT} depends_on: - chroma From e25826033985dedd4d721b7060bdb79f009fc3f4 Mon Sep 17 00:00:00 2001 From: Anush008 Date: Tue, 8 Aug 2023 19:03:28 +0530 Subject: [PATCH 5/7] chore: Removed default webserver port fallback --- .env.example | 6 +++--- README.md | 5 +++-- src/constants.rs | 3 --- src/main.rs | 7 ++----- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/.env.example b/.env.example index 0ff1466..b56445f 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,4 @@ -OPENAI_API_KEY= +OPENAI_API_KEY= #REQUIRED +WEBSERVER_PORT= #REQUIRED CHROMA_URL= #Defaults to http://localhost:8000 -RUST_LOG= #Logging levels: "error", "warn", "info", "debug", "trace" -WEBSERVER_PORT= #Defaults to 3000 \ No newline at end of file +RUST_LOG= #Logging levels: "error", "warn", "info", "debug", "trace" \ No newline at end of file diff --git a/README.md b/README.md index 3125d3b..63c1c50 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,8 @@ Once, the above requirements are satisfied, you can run the project like so: The project requires the following environment variables to be set. * [`OPENAI_API_KEY`](https://platform.openai.com/account/api-keys). To authenticate requests to OpenAI. +* [`WEBSERVER_PORT`](https://github.com/open-sauced/repo-query/blob/alpha/.env.example). The port at which Repo-Query listens to requests. + ### Database setup Start Docker and run the following commands to spin-up a Docker container with a QdrantDB image. @@ -122,11 +124,10 @@ Start Docker and run the following commands to spin-up a Docker container with a docker pull ghcr.io/chroma-core/chroma:0.4.5 docker run -p 8000:8000 ghcr.io/chroma-core/chroma:0.4.5 ``` -The database dashboard will be accessible at [localhost:6333/dashboard](http://localhost:6333/dashboard), the project communicates with the DB on port `6334`. ### Running the project -Run the following command to install the dependencies and run the project on port `3000`. +Run the following command to install the dependencies and run the project. ``` cargo run --release diff --git a/src/constants.rs b/src/constants.rs index 203ec1e..21af8c4 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -1,8 +1,5 @@ use std::ops::RangeInclusive; -// Env var defaults -pub const WEBSERVER_PORT_DEFAULT: &str = "3000"; - //Actix-web pub const SSE_CHANNEL_BUFFER_SIZE: usize = 1; pub const HOME_ROUTE_REDIRECT_URL: &str = "https://opensauced.pizza"; diff --git a/src/main.rs b/src/main.rs index 7a3ba15..1ba31ab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use std::{path::Path, sync::Arc}; use actix_cors::Cors; use actix_web::{web, App, HttpServer}; -use constants::{HOME_ROUTE_REDIRECT_URL, WEBSERVER_PORT_DEFAULT}; +use constants::HOME_ROUTE_REDIRECT_URL; use env_logger::Env; use tracing_actix_web::TracingLogger; @@ -23,10 +23,7 @@ async fn main() -> std::io::Result<()> { let model: Arc = Arc::new(embeddings::Onnx::new(Path::new("model")).unwrap()); let db: Arc = Arc::new(db::ChromaDB::initialize().unwrap()); - let mut port = std::env::var("WEBSERVER_PORT").unwrap_or(WEBSERVER_PORT_DEFAULT.into()); - if port.is_empty() { - port = WEBSERVER_PORT_DEFAULT.to_string(); - } + let port = std::env::var("WEBSERVER_PORT").expect("WEBSERVER_PORT not set"); let port = port.parse::().expect("Invalid WEBSERVER_PORT"); HttpServer::new(move || { From 37bd46815bc9eb765bb9a271783023c1f1ad9c5c Mon Sep 17 00:00:00 2001 From: Anush008 Date: Sat, 12 Aug 2023 17:18:37 +0530 Subject: [PATCH 6/7] chore: Update deps --- Cargo.lock | 149 ++++++++++++++++++++++++++++------------------------- 1 file changed, 79 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 51541ea..c1692e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -120,7 +120,7 @@ dependencies = [ "futures-util", "mio", "num_cpus", - "socket2", + "socket2 0.4.9", "tokio", "tracing", ] @@ -182,7 +182,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "smallvec", - "socket2", + "socket2 0.4.9", "time", "url", ] @@ -307,9 +307,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" +checksum = "86b8f9420f797f2d9e935edf629310eb938a0d839f984e25327f3c7eed22300c" dependencies = [ "memchr", ] @@ -370,9 +370,9 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c" dependencies = [ "anstyle", "windows-sys 0.48.0", @@ -392,9 +392,9 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "async-trait" -version = "0.1.72" +version = "0.1.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" +checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", @@ -403,14 +403,14 @@ dependencies = [ [[package]] name = "auto_enums" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa44067eaa1097fc513fcdea6b9c42ea8a792f750a181937d52b315477e7b7a" +checksum = "dd4ba50b181a898ce52142184e3a46641002b3b190bf5ef827eb3c578fad4b70" dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.28", ] [[package]] @@ -460,9 +460,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" [[package]] name = "block-buffer" @@ -566,9 +566,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c6b2562119bf28c3439f7f02db99faf0aa1a8cdfe5772a2ee155d32227239f0" +checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" dependencies = [ "jobserver", "libc", @@ -604,9 +604,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.3.19" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d" +checksum = "c27cdf28c0f604ba3f512b0c9a409f8de8513e4816705deb0498b627e7c3a3fd" dependencies = [ "clap_builder", "clap_derive", @@ -615,9 +615,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.19" +version = "4.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" +checksum = "08a9f1ab5e9f01a9b81f202e8562eb9a10de70abf9eaeac1be465c28b75aa4aa" dependencies = [ "anstream", "anstyle", @@ -886,13 +886,13 @@ dependencies = [ [[package]] name = "derive_utils" -version = "0.12.0" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7590f99468735a318c254ca9158d0c065aa9b5312896b5a043b5e39bc96f5fa2" +checksum = "9abcad25e9720609ccb3dcdb795d845e37d8ce34183330a9f48b03a1a71c8e21" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.28", ] [[package]] @@ -1010,13 +1010,13 @@ checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] name = "filetime" -version = "0.2.21" +version = "0.2.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" +checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.2.16", + "redox_syscall 0.3.5", "windows-sys 0.48.0", ] @@ -1282,7 +1282,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -1532,9 +1532,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "macro_rules_attribute" @@ -1809,9 +1809,9 @@ dependencies = [ [[package]] name = "openai-api-rs" -version = "0.1.12" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a63e0d57449b6e1afb37bfbe54f9267fc1f9f9d549bf3e9e48fc524892219128" +checksum = "ae5e17ff92255b12b0859707e944cff46145a16a2d3767fa2f7b87b17e909701" dependencies = [ "reqwest", "serde", @@ -1821,9 +1821,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.55" +version = "0.10.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" +checksum = "729b745ad4a5575dd06a3e1af1414bd330ee561c01b3899eb584baeaa8def17e" dependencies = [ "bitflags 1.3.2", "cfg-if", @@ -1853,9 +1853,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.90" +version = "0.9.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6" +checksum = "866b5f16f90776b9bb8dc1e1802ac6f0513de3a7a7465867bfbc563dc737faac" dependencies = [ "cc", "libc", @@ -1944,18 +1944,18 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94" [[package]] name = "pin-project" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", @@ -1964,9 +1964,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.10" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" +checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05" [[package]] name = "pin-utils" @@ -2104,11 +2104,11 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ - "aho-corasick 1.0.2", + "aho-corasick 1.0.3", "memchr", "regex-automata", "regex-syntax 0.7.4", @@ -2116,11 +2116,11 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.4" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" dependencies = [ - "aho-corasick 1.0.2", + "aho-corasick 1.0.3", "memchr", "regex-syntax 0.7.4", ] @@ -2217,11 +2217,11 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.6" +version = "0.38.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.4.0", "errno", "libc", "linux-raw-sys", @@ -2248,7 +2248,7 @@ checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" dependencies = [ "log", "ring", - "rustls-webpki 0.101.2", + "rustls-webpki 0.101.3", "sct", ] @@ -2273,9 +2273,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.101.2" +version = "0.101.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "513722fd73ad80a71f72b61009ea1b584bcfa1483ca93949c8f290298837fa59" +checksum = "261e9e0888cba427c3316e6322805653c9425240b6fd96cee7cb671ab70ab8d0" dependencies = [ "ring", "untrusted", @@ -2343,18 +2343,18 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.180" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.180" +version = "1.0.183" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" dependencies = [ "proc-macro2", "quote", @@ -2453,6 +2453,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -2507,9 +2517,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.39" +version = "0.4.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96d2ffad078296368d46ff1cb309be1c23c513b4ab0e22a45de0185275ac96" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" dependencies = [ "filetime", "libc", @@ -2518,9 +2528,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.7.0" +version = "3.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" +checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651" dependencies = [ "cfg-if", "fastrand", @@ -2540,9 +2550,9 @@ dependencies = [ [[package]] name = "text-splitter" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62a5c046e622fc8f2d16754acde6e2a752885cb4b7af7bf47454cf633d9bb57c" +checksum = "27e155cf42d0862836a461b9740411748457436df9d94f48458c71138f77d91c" dependencies = [ "auto_enums", "either", @@ -2653,11 +2663,10 @@ dependencies = [ [[package]] name = "tokio" -version = "1.29.1" +version = "1.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da" +checksum = "2d3ce25f50619af8b0aec2eb23deebe84249e19e2ddd393a6e16e3300a6dadfd" dependencies = [ - "autocfg", "backtrace", "bytes", "libc", @@ -2666,7 +2675,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.5.3", "tokio-macros", "windows-sys 0.48.0", ] @@ -2737,9 +2746,9 @@ dependencies = [ [[package]] name = "tracing-actix-web" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce52ffaf2d544e317d3bef63f49a6a22022866505fa4840a4339b1756834a2a9" +checksum = "5c0b08ce08cbde6a96fc1e4ebb8132053e53ec7a5cd27eef93ede6b73ebbda06" dependencies = [ "actix-web", "pin-project", @@ -3199,9 +3208,9 @@ dependencies = [ [[package]] name = "xattr" -version = "0.2.3" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d1526bbe5aaeb5eb06885f4d987bcdfa5e23187055de9b83fe00156a821fabc" +checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" dependencies = [ "libc", ] From cc7f60612d1e11c7baa9bbd618c7109df45d8325 Mon Sep 17 00:00:00 2001 From: Anush Date: Sun, 13 Aug 2023 15:23:13 +0530 Subject: [PATCH 7/7] docs: Update API reference README.md --- README.md | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 63c1c50..3552ddf 100644 --- a/README.md +++ b/README.md @@ -22,17 +22,24 @@ ## 🔎 The Project RepoQuery is an early-beta project, that uses recursive [OpenAI function calling](https://platform.openai.com/docs/api-reference/chat/create#chat/create-functions) paired with semantic search using [multi-qa-MiniLM-L6-cos-v1](https://huggingface.co/rawsh/multi-qa-MiniLM-distill-onnx-L6-cos-v1/blob/main/onnx/model_quantized.onnx) to index and answer user queries about public GitHub repositories. -## 📬 Service Endpoints +## 📬 Service Endpoints > **Note:** -Since the service returns responses as SSEs, a REST client like Postman is recommended. Download it [here](https://www.postman.com/downloads/). The Postman web client doesn't support requests to `localhost`. +> Since the service returns responses as SSEs, a REST client like Postman is recommended. Download it [here](https://www.postman.com/downloads/). The Postman web client doesn't support requests to `localhost`. [![Run in Postman](https://run.pstmn.io/button.svg)](https://app.getpostman.com/run-collection/18073744-276b793e-f5ec-418f-ba0a-9dff94af543e?action=collection%2Ffork&source=rip_markdown&collection-url=entityId%3D18073744-276b793e-f5ec-418f-ba0a-9dff94af543e%26entityType%3Dcollection%26workspaceId%3D8d8a1363-ad0a-45ad-b036-ef6a37e44ef8) -### 1. `POST /embed` -To generate and store embeddings for a GitHub repository. +| Endpoint | Method | Description | +|----------------------|--------|-----------------------------------------------| +| `/` | GET | Redirects to the configured [redirect URL](https://github.com/open-sauced/repo-query/blob/afc4d19068e7c84a2566dae9598f1500f1191705/src/constants.rs#L12). | +| `/embed` | POST | Generate and store embeddings for a GitHub repository. | +| `/query` | POST | Perform a query on the API with a specific question related to a repository. | +| `/collection` | GET | Check if a repository has been indexed. | + +### 1. `/embed` #### Parameters + The parameters are passed as a JSON object in the request body: - `owner` (string, required): The owner of the repository. @@ -40,9 +47,11 @@ The parameters are passed as a JSON object in the request body: - `branch` (string, required): The name of the branch. #### Response -The request is processed by the server and responses are sent as [Server-sent events(SSE)](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events). The event stream will contain the following events with optional data. https://github.com/open-sauced/repo-query/blob/f2f415a4fa9c02d4530624fd7bac2105eea1a77c/src/routes/events.rs#L14-L20 + +The request is processed by the server and responses are sent as [Server-sent events(SSE)](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events). The event stream will contain [events](https://github.com/open-sauced/repo-query/blob/afc4d19068e7c84a2566dae9598f1500f1191705/src/routes/events.rs#L14-L21) with optional data. #### Example + ```bash curl --location 'localhost:3000/embed' \ --header 'Content-Type: application/json' \ @@ -53,10 +62,10 @@ curl --location 'localhost:3000/embed' \ }' ``` -### 2. `POST /query` -To perform a query on the API with a specific question related to a repository. +### 2. `/query` #### Parameters + The parameters are passed as a JSON object in the request body: - `query` (string, required): The question or query you want to ask. @@ -66,10 +75,11 @@ The parameters are passed as a JSON object in the request body: - `branch` (string, required): The name of the branch. #### Response -The request is processed by the server and responses are sent as [Server-sent events(SSE)](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events). The event stream will contain the following events with optional data. https://github.com/open-sauced/repo-query/blob/f2f415a4fa9c02d4530624fd7bac2105eea1a77c/src/routes/events.rs#L22-L29 +The request is processed by the server and responses are sent as [Server-sent events(SSE)](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events). The event stream will contain [events](https://github.com/open-sauced/repo-query/blob/afc4d19068e7c84a2566dae9598f1500f1191705/src/routes/events.rs#L23-L32) with optional data. #### Example + ```bash curl --location 'localhost:3000/query' \ --header 'Content-Type: application/json' \ @@ -83,18 +93,20 @@ curl --location 'localhost:3000/query' \ }' ``` -### 3. `GET /collection` -To check if a repository has been indexed. +### 3. `/collection` #### Parameters + - `owner` (string, required): The owner of the repository. - `name` (string, required): The name of the repository. - `branch` (string, required): The name of the branch. #### Response + This endpoint returns an `OK` status code if the repository has been indexed by the service. #### Example + ```bash curl --location 'localhost:3000/embed?owner=open-sauced&name=ai&branch=beta' ```