-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add publication_index to improve publications query
- Loading branch information
Showing
15 changed files
with
723 additions
and
7 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# syntax=docker/dockerfile:1 | ||
|
||
FROM --platform=$BUILDPLATFORM amd64/rust:slim-bookworm AS builder | ||
RUN apt-get update && apt-get install -y clang lld cmake gcc g++ libc6-dev pkg-config libssl-dev curl openssl | ||
|
||
WORKDIR /src | ||
COPY src ./src | ||
COPY cmd ./cmd | ||
COPY crates ./crates | ||
COPY Cargo.toml Cargo.lock ./ | ||
RUN cargo build --release -p sync-to-publication-index | ||
RUN ls target/release | ||
|
||
FROM scratch AS exporter | ||
WORKDIR /cmd | ||
COPY --from=builder /src/target/release/sync-to-publication-index ./ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,8 @@ | ||
# Writing service | ||
# Writing service | ||
|
||
## 生成数据库脚本 Linux 执行文件 | ||
|
||
```sh | ||
make build-cmd | ||
``` | ||
生成文件位于 ./target/cmd/sync-to-publication-index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[package] | ||
name = "sync-to-publication-index" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
scylla-orm = { path = "../../crates/scylla-orm" } | ||
writing = { path = "../../" } | ||
anyhow = { workspace = true } | ||
axum = { workspace = true } | ||
async-trait = { workspace = true } | ||
bytes = { workspace = true } | ||
base64 = { workspace = true } | ||
ciborium = { workspace = true } | ||
ciborium-io = { workspace = true } | ||
log = { workspace = true } | ||
scylla = { workspace = true } | ||
serde = { workspace = true } | ||
serde_json = { workspace = true } | ||
structured-logger = { workspace = true } | ||
tokio = { workspace = true } | ||
xid = { workspace = true } | ||
futures = "0.3" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
use futures::stream::StreamExt; | ||
use scylla_orm::{ColumnsMap, CqlValue, ToCqlVal}; | ||
use structured_logger::{async_json::new_writer, Builder}; | ||
use tokio::io; | ||
use writing::{conf, db}; | ||
|
||
#[tokio::main(flavor = "multi_thread", worker_threads = 2)] | ||
async fn main() -> anyhow::Result<()> { | ||
Builder::with_level("debug") | ||
.with_target_writer("*", new_writer(io::stdout())) | ||
.init(); | ||
|
||
let nodes = std::env::var("SCYLLA_NODES").unwrap_or_else(|_| "127.0.0.1:9042".into()); | ||
|
||
let cfg = conf::ScyllaDB { | ||
nodes: nodes.split(',').map(|s| s.to_string()).collect(), | ||
username: "".to_string(), | ||
password: "".to_string(), | ||
}; | ||
|
||
let sess = db::scylladb::ScyllaDB::new(cfg, "writing").await?; | ||
let publication_fields = vec![ | ||
"gid".to_string(), | ||
"cid".to_string(), | ||
"language".to_string(), | ||
"version".to_string(), | ||
"status".to_string(), | ||
"model".to_string(), | ||
"from_language".to_string(), | ||
]; | ||
let query = format!("SELECT {} FROM publication", publication_fields.join(",")); | ||
let mut stream = sess.stream(query, ()).await?; | ||
let mut total: usize = 0; | ||
let mut fixed: usize = 0; | ||
let mut synced: usize = 0; | ||
|
||
let update_mode_query = | ||
"UPDATE publication SET model=? WHERE gid=? AND cid=? AND language=? AND version=?"; | ||
while let Some(row) = stream.next().await { | ||
let mut cols = ColumnsMap::with_capacity(publication_fields.len()); | ||
cols.fill(row?, &publication_fields)?; | ||
let mut doc = db::Publication::default(); | ||
doc.fill(&cols); | ||
total += 1; | ||
if doc.model != "gpt-3.5" && doc.model != "gpt-4" { | ||
let params = ( | ||
"gpt-3.5", | ||
doc.gid.to_cql(), | ||
doc.cid.to_cql(), | ||
doc.language.to_cql(), | ||
doc.version, | ||
); | ||
sess.execute(update_mode_query, params).await?; | ||
fixed += 1; | ||
} | ||
|
||
if doc.status == 2 { | ||
let mut idoc = db::PublicationIndex { | ||
cid: doc.cid, | ||
language: doc.language, | ||
original: doc.language == doc.from_language, | ||
version: doc.version, | ||
gid: doc.gid, | ||
..Default::default() | ||
}; | ||
let res = idoc.upsert(&sess).await?; | ||
if res { | ||
synced += 1; | ||
println!( | ||
"doc: {} {} {}", | ||
idoc.cid.to_string(), | ||
idoc.language.to_string(), | ||
idoc.version | ||
); | ||
} | ||
} | ||
} | ||
|
||
println!("total: {}, fixed: {}, synced: {}", total, fixed, synced); | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.