Skip to content

Commit

Permalink
feat: add publication_index to improve publications query
Browse files Browse the repository at this point in the history
  • Loading branch information
zensh committed Oct 2, 2023
1 parent 8aa0876 commit 20c0954
Show file tree
Hide file tree
Showing 15 changed files with 723 additions and 7 deletions.
25 changes: 24 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "writing"
version = "1.0.5"
version = "1.1.0"
edition = "2021"
rust-version = "1.64"
description = ""
Expand All @@ -10,7 +10,7 @@ repository = "https://github.com/yiwen-ai/writing"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[workspace]
members = ["crates/*"]
members = ["crates/*", "cmd/*"]

[workspace.dependencies]
anyhow = "1"
Expand Down Expand Up @@ -114,3 +114,6 @@ hex-literal = "0.4.1"

[profile.release]
lto = true

[target.x86_64-unknown-linux-gnu]
linker = "x86_64-unknown-linux-gnu-gcc"
16 changes: 16 additions & 0 deletions Dockerfile.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# syntax=docker/dockerfile:1

FROM --platform=$BUILDPLATFORM amd64/rust:slim-bookworm AS builder
RUN apt-get update && apt-get install -y clang lld cmake gcc g++ libc6-dev pkg-config libssl-dev curl openssl

WORKDIR /src
COPY src ./src
COPY cmd ./cmd
COPY crates ./crates
COPY Cargo.toml Cargo.lock ./
RUN cargo build --release -p sync-to-publication-index
RUN ls target/release

FROM scratch AS exporter
WORKDIR /cmd
COPY --from=builder /src/target/release/sync-to-publication-index ./
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ fix:

docker:
@docker build -t yiwen-ai/writing:latest .

build-cmd:
@docker build --output target -f Dockerfile.cmd .
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
# Writing service
# Writing service

## 生成数据库脚本 Linux 执行文件

```sh
make build-cmd
```
生成文件位于 ./target/cmd/sync-to-publication-index
25 changes: 25 additions & 0 deletions cmd/sync-to-publication-index/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "sync-to-publication-index"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
scylla-orm = { path = "../../crates/scylla-orm" }
writing = { path = "../../" }
anyhow = { workspace = true }
axum = { workspace = true }
async-trait = { workspace = true }
bytes = { workspace = true }
base64 = { workspace = true }
ciborium = { workspace = true }
ciborium-io = { workspace = true }
log = { workspace = true }
scylla = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
structured-logger = { workspace = true }
tokio = { workspace = true }
xid = { workspace = true }
futures = "0.3"
82 changes: 82 additions & 0 deletions cmd/sync-to-publication-index/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
use futures::stream::StreamExt;
use scylla_orm::{ColumnsMap, CqlValue, ToCqlVal};
use structured_logger::{async_json::new_writer, Builder};
use tokio::io;
use writing::{conf, db};

#[tokio::main(flavor = "multi_thread", worker_threads = 2)]
async fn main() -> anyhow::Result<()> {
Builder::with_level("debug")
.with_target_writer("*", new_writer(io::stdout()))
.init();

let nodes = std::env::var("SCYLLA_NODES").unwrap_or_else(|_| "127.0.0.1:9042".into());

let cfg = conf::ScyllaDB {
nodes: nodes.split(',').map(|s| s.to_string()).collect(),
username: "".to_string(),
password: "".to_string(),
};

let sess = db::scylladb::ScyllaDB::new(cfg, "writing").await?;
let publication_fields = vec![
"gid".to_string(),
"cid".to_string(),
"language".to_string(),
"version".to_string(),
"status".to_string(),
"model".to_string(),
"from_language".to_string(),
];
let query = format!("SELECT {} FROM publication", publication_fields.join(","));
let mut stream = sess.stream(query, ()).await?;
let mut total: usize = 0;
let mut fixed: usize = 0;
let mut synced: usize = 0;

let update_mode_query =
"UPDATE publication SET model=? WHERE gid=? AND cid=? AND language=? AND version=?";
while let Some(row) = stream.next().await {
let mut cols = ColumnsMap::with_capacity(publication_fields.len());
cols.fill(row?, &publication_fields)?;
let mut doc = db::Publication::default();
doc.fill(&cols);
total += 1;
if doc.model != "gpt-3.5" && doc.model != "gpt-4" {
let params = (
"gpt-3.5",
doc.gid.to_cql(),
doc.cid.to_cql(),
doc.language.to_cql(),
doc.version,
);
sess.execute(update_mode_query, params).await?;
fixed += 1;
}

if doc.status == 2 {
let mut idoc = db::PublicationIndex {
cid: doc.cid,
language: doc.language,
original: doc.language == doc.from_language,
version: doc.version,
gid: doc.gid,
..Default::default()
};
let res = idoc.upsert(&sess).await?;
if res {
synced += 1;
println!(
"doc: {} {} {}",
idoc.cid.to_string(),
idoc.language.to_string(),
idoc.version
);
}
}
}

println!("total: {}, fixed: {}, synced: {}", total, fixed, synced);

Ok(())
}
16 changes: 16 additions & 0 deletions cql/schema_table.cql
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,22 @@ CREATE INDEX publication_cid ON publication (cid);
CREATE INDEX publication_url ON publication (original_url);
CREATE INDEX publication_gid_status ON publication ((gid), status);

CREATE TABLE IF NOT EXISTS publication_index (
cid BLOB, -- creation id
language TEXT, -- publication's language, ISO 639-3
original BOOLEAN, -- is original
version SMALLINT, -- creation version
gid BLOB, -- group id, publication belong to
PRIMARY KEY (cid, language)
) WITH CLUSTERING ORDER BY (language ASC)
AND caching = {'enabled': 'true'}
AND comment = 'published publications index'
AND compaction = {'class': 'SizeTieredCompactionStrategy'}
AND compression = {'sstable_compression': 'LZ4Compressor'}
AND default_time_to_live = 0;

CREATE INDEX publication_index_gid ON publication_index (gid);

CREATE TABLE IF NOT EXISTS bookmark (
uid BLOB, -- user id who create the bookmark, 12 bytes XID
id BLOB, -- bookmark id, 12 bytes XID
Expand Down
12 changes: 12 additions & 0 deletions crates/scylla-orm/src/cql_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ impl ToCqlVal for String {
}
}

impl FromCqlVal for bool {
fn from_cql(cql_val: &CqlValue) -> Result<Self, FromCqlValError> {
cql_to_rust::FromCqlVal::from_cql(cql_val.to_owned())
}
}

impl ToCqlVal for bool {
fn to_cql(&self) -> CqlValue {
CqlValue::Boolean(self.to_owned())
}
}

impl FromCqlVal for i8 {
fn from_cql(cql_val: &CqlValue) -> Result<Self, FromCqlValError> {
cql_to_rust::FromCqlVal::from_cql(cql_val.to_owned())
Expand Down
Loading

0 comments on commit 20c0954

Please sign in to comment.