diff --git a/tokenizer/src/lib.rs b/tokenizer/src/lib.rs index 9cb79c5..d04bd64 100644 --- a/tokenizer/src/lib.rs +++ b/tokenizer/src/lib.rs @@ -1,2 +1,2 @@ -pub mod oai; +pub mod tokenizer; pub mod pinecone_utils; \ No newline at end of file diff --git a/tokenizer/src/main.rs b/tokenizer/src/main.rs index c34aad4..21dbf2b 100644 --- a/tokenizer/src/main.rs +++ b/tokenizer/src/main.rs @@ -6,11 +6,11 @@ async fn main() -> std::io::Result<()> { mongo.connect().await?; // set collection to tokenize - let collection = "github_data"; - let oai_key = "generate_2023"; + let collection: &str = "github_data"; + let oai_key: &str = "generate_2023"; // TODO: create a new collection for each repo, insert documents into sub collections - let tokenizer: OpenAIClient = OpenAIClient::new(oai_key, mongo, collection); + let tokenizer: Tokenizer = Tokenizer::new(oai_key, mongo, collection); tokenizer.tokenize_collection(collection); Ok(()) diff --git a/tokenizer/src/oai.rs b/tokenizer/src/tokenizer.rs similarity index 95% rename from tokenizer/src/oai.rs rename to tokenizer/src/tokenizer.rs index 5267122..75e3f13 100644 --- a/tokenizer/src/oai.rs +++ b/tokenizer/src/tokenizer.rs @@ -5,13 +5,13 @@ use std::error::Error; use std::env; -pub struct OpenAIClient { +pub struct Tokenizer { oai_client: Client, mongo_model: &MongoDriver, collection: String, } -impl OpenAIClient { +impl Tokenizer { pub fn new(openai_api_key: &str, mongo_model: &MongoDriver, collection: &str) -> Self { OpenAIClient { @@ -35,7 +35,7 @@ impl OpenAIClient { let args: openai_rust::embeddings::EmbeddingsArguments = openai_rust::embeddings::EmbeddingsArguments::new("text-embedding-ada-002", text.to_owned()); let embedding: Vec = self.oai_client.create_embeddings(args).await.unwrap().data; - let update_doc = doc! { + let update_doc: Document = doc! { "$set": { "embedding": embedding } };