Skip to content

Commit

Permalink
Tokenize build with Redis
Browse files Browse the repository at this point in the history
  • Loading branch information
brianreicher committed Oct 9, 2023
1 parent 91b5bdf commit 248061f
Show file tree
Hide file tree
Showing 13 changed files with 121 additions and 139 deletions.
6 changes: 6 additions & 0 deletions .metals/metals.lock.db
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#FileLock
#Mon Oct 09 13:58:34 EDT 2023
hostName=localhost
id=18b159714afc7992c11ec7d9147ac2956796e2e5dc2
method=file
server=localhost\:55612
1 change: 1 addition & 0 deletions .scala-build/.bloop/project_bd2c96d2de-test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":"1.4.0","project":{"name":"project_bd2c96d2de-test","directory":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build","workspaceDir":"/Users/brianreicher/Documents/GitHub/turbine","sources":[],"dependencies":["project_bd2c96d2de"],"classpath":["/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar","/Users/brianreicher/Documents/GitHub/turbine/.scala-build/project_bd2c96d2de/classes/main","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/com/sourcegraph/semanticdb-javac/0.7.4/semanticdb-javac-0.7.4.jar"],"out":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build/.bloop/project_bd2c96d2de-test","classesDir":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build/project_bd2c96d2de/classes/test","scala":{"organization":"org.scala-lang","name":"scala-compiler","version":"3.2.2","options":["-Xsemanticdb","-sourceroot","/Users/brianreicher/Documents/GitHub/turbine","-release","8"],"jars":["/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-compiler_3/3.2.2/scala3-compiler_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-interfaces/3.2.2/scala3-interfaces-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/tasty-core_3/3.2.2/tasty-core_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/modules/scala-asm/9.3.0-scala-1/scala-asm-9.3.0-scala-1.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-sbt/compiler-interface/1.3.5/compiler-interface-1.3.5.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-reader/3.19.0/jline-reader-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-terminal/3.19.0/jline-terminal-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-terminal-jna/3.19.0/jline-terminal-jna-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/com/google/protobuf/protobuf-java/3.7.0/protobuf-java-3.7.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-sbt/util-interface/1.3.0/util-interface-1.3.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar"]},"java":{"options":["--release","8","-Xplugin:semanticdb -sourceroot:/Users/brianreicher/Documents/GitHub/turbine -targetroot:javac-classes-directory","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"]},"test":{"frameworks":[{"names":["com.novocode.junit.JUnitFramework"]},{"names":["org.scalatest.tools.Framework","org.scalatest.tools.ScalaTestFramework"]},{"names":["org.scalacheck.ScalaCheckFramework"]},{"names":["org.specs.runner.SpecsFramework","org.specs2.runner.Specs2Framework","org.specs2.runner.SpecsFramework"]},{"names":["utest.runner.Framework"]},{"names":["munit.Framework"]}],"options":{"excludes":[],"arguments":[]}},"platform":{"name":"jvm","config":{"options":[]},"mainClass":[]},"resolution":{"modules":[{"organization":"org.scala-lang","name":"scala3-library_3","version":"3.2.2","artifacts":[{"name":"scala3-library_3","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar"},{"name":"scala3-library_3","classifier":"sources","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2-sources.jar"}]},{"organization":"org.scala-lang","name":"scala-library","version":"2.13.10","artifacts":[{"name":"scala-library","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar"},{"name":"scala-library","classifier":"sources","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10-sources.jar"}]}]},"tags":["test"]}}
1 change: 1 addition & 0 deletions .scala-build/.bloop/project_bd2c96d2de.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"version":"1.4.0","project":{"name":"project_bd2c96d2de","directory":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build","workspaceDir":"/Users/brianreicher/Documents/GitHub/turbine","sources":[],"dependencies":[],"classpath":["/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/com/sourcegraph/semanticdb-javac/0.7.4/semanticdb-javac-0.7.4.jar"],"out":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build/.bloop/project_bd2c96d2de","classesDir":"/Users/brianreicher/Documents/GitHub/turbine/.scala-build/project_bd2c96d2de/classes/main","scala":{"organization":"org.scala-lang","name":"scala-compiler","version":"3.2.2","options":["-Xsemanticdb","-sourceroot","/Users/brianreicher/Documents/GitHub/turbine","-release","8"],"jars":["/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-compiler_3/3.2.2/scala3-compiler_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-interfaces/3.2.2/scala3-interfaces-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/tasty-core_3/3.2.2/tasty-core_3-3.2.2.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/modules/scala-asm/9.3.0-scala-1/scala-asm-9.3.0-scala-1.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-sbt/compiler-interface/1.3.5/compiler-interface-1.3.5.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-reader/3.19.0/jline-reader-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-terminal/3.19.0/jline-terminal-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/jline/jline-terminal-jna/3.19.0/jline-terminal-jna-3.19.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/com/google/protobuf/protobuf-java/3.7.0/protobuf-java-3.7.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-sbt/util-interface/1.3.0/util-interface-1.3.0.jar","/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/net/java/dev/jna/jna/5.3.1/jna-5.3.1.jar"]},"java":{"options":["--release","8","-Xplugin:semanticdb -sourceroot:/Users/brianreicher/Documents/GitHub/turbine -targetroot:javac-classes-directory","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED","-J--add-exports","-Jjdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED"]},"platform":{"name":"jvm","config":{"options":[]},"mainClass":[]},"resolution":{"modules":[{"organization":"org.scala-lang","name":"scala3-library_3","version":"3.2.2","artifacts":[{"name":"scala3-library_3","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2.jar"},{"name":"scala3-library_3","classifier":"sources","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala3-library_3/3.2.2/scala3-library_3-3.2.2-sources.jar"}]},{"organization":"org.scala-lang","name":"scala-library","version":"2.13.10","artifacts":[{"name":"scala-library","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10.jar"},{"name":"scala-library","classifier":"sources","path":"/Users/brianreicher/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/scala-lang/scala-library/2.13.10/scala-library-2.13.10-sources.jar"}]}]},"tags":["library"]}}
2 changes: 0 additions & 2 deletions ingestion/.gitignore

This file was deleted.

25 changes: 0 additions & 25 deletions ingestion/build.sbt

This file was deleted.

5 changes: 0 additions & 5 deletions ingestion/project/Dependencies.scala

This file was deleted.

1 change: 0 additions & 1 deletion ingestion/project/build.properties

This file was deleted.

38 changes: 0 additions & 38 deletions ingestion/src/main/scala/driver/Driver.scala

This file was deleted.

10 changes: 0 additions & 10 deletions ingestion/src/main/scala/driver/FileFilter.scala

This file was deleted.

51 changes: 0 additions & 51 deletions ingestion/src/main/scala/driver/ProcessRepos.scala

This file was deleted.

7 changes: 0 additions & 7 deletions ingestion/src/test/scala/example/HelloSpec.scala

This file was deleted.

11 changes: 11 additions & 0 deletions tokenizer/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "tokenizer"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
pinenut = "0.1.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
102 changes: 102 additions & 0 deletions tokenizer/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use pinenut::{Client, Index, models::Vector};
use serde_json::{Value, from_str};


pub struct PinceoneDriver {
client: Client,
index: Index,
}

impl PinceoneDriver {
pub fn new(host: &str, port: u16, db_name: &str) -> Self {
let client = Client::with_uri_str(&format!("mongodb://{}:{}", host, port))
.expect("Failed to create PineconeDB client");
let client = Client::new(env!("PINECONE_API_KEY"), env!("PINECONE_ENV")).await.unwrap();
let mut index = client.index(env!("PINECONE_INDEX_NAME"));
let _ = index.describe().await.unwrap();

PinceoneDriver { client, index }
}

pub async fn flush_collection(&self, collection_name: &str) -> Result<(), mongodb::error::Error> {
let collection = self.client.database(&self.db_name).collection(collection_name);
let result = collection.delete_many(Document::new()).await?;
println!("Deleted {} documents from {}", result.deleted_count, collection_name);
Ok(())
}

pub async fn remove_collection(&self, collection_name: &str) -> Result<(), mongodb::error::Error> {
let collection = self.client.database(&self.db_name).collection(collection_name);
collection.drop(None).await?;
println!("Dropped {} from {}", collection_name, self.db_name);
Ok(())
}

pub async fn create_collection(&self, collection_name: &str) -> Result<(), mongodb::error::Error> {
self.client.database(&self.db_name).create_collection(collection_name, None).await?;
println!("Created collection {} in {}", collection_name, self.db_name);
Ok(())
}

pub async fn collection_size(&self, collection_name: &str) -> Result<i64, mongodb::error::Error> {
let collection = self.client.database(&self.db_name).collection(collection_name);
let count = collection.count_documents(Document::new(), None).await?;
Ok(count)
}

pub async fn insert_data(&self, collection_name: &str, json_file: &str, clear: bool) -> Result<(), mongodb::error::Error> {
let collection = self.client.database(&self.db_name).collection(collection_name);

if clear {
self.flush_collection(collection_name).await?;
}

let file_contents = std::fs::read_to_string(json_file)?;
let data: Vec<Value> = from_str(&file_contents)?;

let mut documents = Vec::new();
for document in data {
if let Value::Object(map) = document {
let bson_doc = Document::try_from(map)?;
documents.push(bson_doc);
}
}

collection.insert_many(documents, None).await?;
println!("Inserted {} documents into {} in the {}", documents.len(), collection_name, self.db_name);
Ok(())
}

pub async fn search_query(&self, collection_name: &str, qu: Document, proj: Document, lim: i64, show: bool) -> Result<Vec<Document>, mongodb::error::Error> {
let collection = self.client.database(&self.db_name).collection(collection_name);
let cursor = collection.find(qu, None).projection(proj).limit(lim);
let mut result = Vec::new();

if show {
while let Some(doc) = cursor.next().await {
match doc {
Ok(document) => {
println!("{:?}", document);
result.push(document);
}
Err(e) => {
eprintln!("Error: {:?}", e);
}
}
}
} else {
while let Some(doc) = cursor.next().await {
match doc {
Ok(document) => {
result.push(document);
}
Err(e) => {
eprintln!("Error: {:?}", e);
}
}
}
}

Ok(result)
}
}

0 comments on commit 248061f

Please sign in to comment.