Skip to content

Commit

Permalink
Merge branch 'release/3.1.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
sebosp committed Jun 16, 2024
2 parents babc581 + 5525778 commit 97720e1
Show file tree
Hide file tree
Showing 15 changed files with 4,456 additions and 63 deletions.
11 changes: 7 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "s2protocol"
description = "A parser for Starcraft II - Replay format, exports to different target formats"
version = "3.0.4"
version = "3.1.0"
authors = ["Seb Ospina <[email protected]>"]
edition = "2021"
readme = "README.md"
Expand All @@ -26,15 +26,18 @@ arrow2_convert = { version = "0.5.0", optional = true }
sha256 = { version = "1.4.0", optional = true, default-features = false }
rayon = { version = "1.7.0", optional = true }
chrono = { version = "0.4.31", features = ["serde"] }
libc = "0.2"
bat = { version = "0.24.0", optional = true }

[features]
default = ["tracing_info_level", "arrow"]
# A feature that allows extra tracing, disable by default because of performance
default = ["tracing_off", "arrow", "syntax"]
# A feature that allows extra tracing, disabled by default because of performance
tracing_off = ["tracing/max_level_off", "tracing/release_max_level_off"]
tracing_trace_level = ["tracing/max_level_trace", "tracing/release_max_level_trace"]
tracing_debug_level = ["tracing/max_level_debug", "tracing/release_max_level_debug"]
tracing_info_level = ["tracing/max_level_info", "tracing/release_max_level_info"]
arrow = ["arrow2", "arrow2_convert", "sha256", "rayon"]

syntax = ["bat"]

[dev-dependencies]
test-log = { version = "0.2", default-features = false, features = ["trace"] }
Expand Down
42 changes: 21 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ information it packs.

From the available data, analytics, visualizations and generative art can be created, for example
by using
- jupyter notebook in [s2-polars-data-analysis](https://github.com/sebosp/s2-polars-data-analysis)
- [rerun](https://github.com/rerun-io/rerun) : See the repo [swarmy](https://github.com/sebosp/swarmy)
- jupyter notebook in [s2-polars-data-analysis](https://github.com/sebosp/s2-polars-data-analysis) - [rerun](https://github.com/rerun-io/rerun) : See the repo [swarmy](https://github.com/sebosp/swarmy)
- [yew](https://github.com/yewstack/yew) [cooper](https://github.com/sebosp/cooper)
- [eframe/egui](https://github.com/emilk/egui): See repo [eframes-c2](https://github.com/sebosp/eframe-sc2)
- [bevyengine/bevy](https://github.com/bevyengine/bevy) can be used to see:
Expand Down Expand Up @@ -58,29 +57,30 @@ for (event, change_hint) in res.into_iter() {

In the directory ipcs/ one .ipc file will be created per implemented data type.
The `--source` is the directory that contains the replay directory (Or a single file).
Multiple subdirectories are supported.
Files are processed using parallel operations.
For 3600 files (500 MBs) it takes 30 seconds to transform/split them. YMMV

This is behind a feature flag `arrow`.
For 17K replays (2.3 GBs) it takes 120 seconds to parse/transform/split them. YMMV, in this case only 10K files had valid init data (as in are supported protocol versions).

```bash
$ mkdir ipcs/
$ cargo run --features arrow -r -- --source "/mnt/windows/Users/sebos/Documents/StarCraft II/Accounts/51504154/2-S2-1-8459957/Replays/Multiplayer/" --output ipcs/ write-arrow-ipc all
2023-10-04T18:53:36.030202Z INFO s2protocol::arrow: Processing Arrow write request
2023-10-04T18:53:36.441089Z INFO s2protocol::arrow: Found 3600 files
2023-10-04T18:53:36.441646Z INFO s2protocol::arrow: Processing InitData IPC write request
2023-10-04T18:53:38.515349Z INFO s2protocol::arrow: Loaded 3600 records
2023-10-04T18:53:38.575090Z INFO s2protocol::arrow: Processing Details IPC write request
2023-10-04T18:53:38.700572Z INFO s2protocol::arrow: Loaded 3600 records
2023-10-04T18:53:38.706659Z INFO s2protocol::arrow: Processing TrackerEvents IPC write request: Stats
2023-10-04T18:53:44.295524Z INFO s2protocol::arrow: Loaded 1722783 records
2023-10-04T18:53:44.515362Z INFO s2protocol::arrow: Processing TrackerEvents IPC write request: Upgrades
2023-10-04T18:53:49.963043Z INFO s2protocol::arrow: Loaded 292898 records
2023-10-04T18:53:50.036165Z INFO s2protocol::arrow: Processing TrackerEvents IPC write request: UnitBorn
2023-10-04T18:53:57.561082Z INFO s2protocol::arrow: Loaded 22754591 records
2023-10-04T18:54:00.502298Z INFO s2protocol::arrow: Processing TrackerEvents IPC write request: UnitDied
2023-10-04T18:54:07.387545Z INFO s2protocol::arrow: Loaded 16118808 records
Total time: 33.654286961s
$ cargo run -r -- -v error --timing --source /home/seb/SCReplaysOnNVMe --output /home/seb/git/s2protocol-rs/ipcs/ write-arrow-ipc --process-max-files 10000000 all
Located 17021 matching files by extension
10299 files have valid init data, processing...
Total time: 121.943999981s
& du -sh ipcs
13G ipcs

& ls -ltra ipcs
total 13123004
drwxr-xr-x 2 seb seb 4096 Jun 16 09:33 ./
drwxr-xr-x 11 seb seb 4096 Jun 16 22:09 ../
-rw-r--r-- 1 seb seb 81070479 Jun 16 22:10 init_data.ipc
-rw-r--r-- 1 seb seb 10789826 Jun 16 22:10 details.ipc
-rw-r--r-- 1 seb seb 843902480 Jun 16 22:10 stats.ipc
-rw-r--r-- 1 seb seb 67967152 Jun 16 22:10 upgrades.ipc
-rw-r--r-- 1 seb seb 5712488337 Jun 16 22:10 unit_born.ipc
-rw-r--r-- 1 seb seb 4652841877 Jun 16 22:11 unit_died.ipc
-rw-r--r-- 1 seb seb 2068859942 Jun 16 22:12 cmd.ipc
```

### Jupyter Notebooks
Expand Down
54 changes: 52 additions & 2 deletions src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use init_data::InitData;
use rayon::prelude::*;

use crate::cli::get_matching_files;
use crate::game_events::GameEventIterator;
use crate::tracker_events::{self, TrackerEventIterator};
use crate::*;
use clap::Subcommand;
Expand All @@ -35,6 +36,8 @@ pub enum ArrowIpcTypes {
UnitDied,
/// Writes the [`crate::message_events::MessageEvent`] to an Arrow IPC file
MessageEvents,
/// Writes the [`crate::game_events::Cmd`] to an Arrow IPC file
Cmd,
/// Writes all the implemented flat row types to Arrow IPC files inside the output directory
All,
}
Expand Down Expand Up @@ -87,6 +90,13 @@ impl ArrowIpcTypes {
panic!("Invalid schema, expected struct");
}
}
Self::Cmd => {
if let DataType::Struct(fields) = game_events::CmdEventFlatRow::data_type() {
arrow2::datatypes::Schema::from(fields.clone())
} else {
panic!("Invalid schema, expected struct");
}
}
_ => unimplemented!(),
}
}
Expand All @@ -104,6 +114,8 @@ impl ArrowIpcTypes {
Self::Stats => self.handle_tracker_events(sources, output),
Self::Upgrades => self.handle_tracker_events(sources, output),
Self::UnitBorn => self.handle_tracker_events(sources, output),
Self::UnitDied => self.handle_tracker_events(sources, output),
Self::Cmd => self.handle_game_events(sources, output),
Self::All => {
if !output.is_dir() {
panic!("Output must be a directory for types 'all'");
Expand All @@ -125,6 +137,7 @@ impl ArrowIpcTypes {
.handle_tracker_events(sources.clone(), output.join("unit_born.ipc"))?;
Self::UnitDied
.handle_tracker_events(sources.clone(), output.join("unit_died.ipc"))?;
Self::Cmd.handle_game_events(sources.clone(), output.join("cmd.ipc"))?;
Ok(())
}
_ => todo!(),
Expand All @@ -148,7 +161,7 @@ impl ArrowIpcTypes {
Ok(())
}

/// Creates a new Arrow IPC file with the stats data
/// Creates a new Arrow IPC file with the tracker events data
/// This seems to be small enough to not need to be chunked and is done in parallel
/// This requires 1.5GB of RAM for 3600 files, so maybe not good for real players.
#[tracing::instrument(level = "debug")]
Expand Down Expand Up @@ -193,6 +206,38 @@ impl ArrowIpcTypes {
close_arrow_mutex_writer(writer)
}

/// Creates a new Arrow IPC file with the game events data
/// This requires 1.5GB of RAM for 3600 files, so maybe not good for real players.
#[tracing::instrument(level = "debug")]
pub fn handle_game_events(
&self,
sources: Vec<InitData>,
output: PathBuf,
) -> Result<(), Box<dyn std::error::Error>> {
tracing::info!("Processing GameEvents IPC write request: {:?}", self);
let writer = open_arrow_mutex_writer(output, self.schema())?;

// process files in parallel, the internal iterators will fight for the lock
let total_records = sources
.par_iter()
.filter_map(|source| {
let source = PathBuf::from(&source.file_name);
let details = crate::details::Details::try_from(source.clone()).ok()?;
let game_events = GameEventIterator::new(&source).ok()?;
let (res, batch_len): (Box<dyn Array>, usize) = match self {
Self::Cmd => {
let batch = game_events.collect_into_game_cmds_flat_rows(&details);
(batch.try_into_arrow().ok()?, batch.len())
}
e => unimplemented!("{:?}", e),
};
write_to_arrow_mutex_writer(&writer, res, batch_len)
})
.sum::<usize>();
tracing::info!("Loaded {} records", total_records);
close_arrow_mutex_writer(writer)
}

/// Creates a new Arrow IPC file with the details data
#[tracing::instrument(level = "debug")]
pub fn handle_details_ipc_cmd(
Expand Down Expand Up @@ -232,7 +277,7 @@ impl ArrowIpcTypes {
cmd.max_version
);
let sources = get_matching_files(source, cmd.scan_max_files, cmd.traverse_max_depth)?;
tracing::info!("Scanned {} files", sources.len());
println!("Located {} matching files by extension", sources.len());
let mut sources: Vec<InitData> = sources
.par_iter()
.filter_map(|source| crate::init_data::InitData::try_from(source.clone()).ok())
Expand All @@ -255,6 +300,11 @@ impl ArrowIpcTypes {
.collect();
if sources.is_empty() {
panic!("No files found");
} else {
println!(
"{} files have valid init data, processing...",
sources.len()
);
}
// Identify the shortest unique sha256 hash fragment.
let mut smallest_fragment = 1;
Expand Down
67 changes: 54 additions & 13 deletions src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#[cfg(feature = "arrow")]
use super::*;

#[cfg(feature = "syntax")]
use bat::{Input, PrettyPrinter};

use crate::game_events::iterator::GameEventIterator;
use crate::generator::proto_morphist::ProtoMorphist;
use crate::read_details;
Expand Down Expand Up @@ -41,8 +44,7 @@ enum Commands {
WriteArrowIpc(WriteArrowIpcProps),
}

// Create a subcommand that handles the max depth and max files to process

/// Create a subcommand that handles the max depth and max files to process
#[cfg(feature = "arrow")]
#[derive(Args, Debug)]
pub struct WriteArrowIpcProps {
Expand Down Expand Up @@ -88,6 +90,10 @@ struct Cli {
/// Show basic performance metrics
#[arg(short, long, default_value = "false")]
timing: bool,

/// Show basic performance metrics
#[arg(short, long, default_value = "false")]
color: bool,
}

/// Matches a list of files in case the cli.source param is a directory
Expand Down Expand Up @@ -132,6 +138,30 @@ pub fn get_matching_files(
}
}

/// Prints the json strings either with Bat PrettyPrint or just plain json
pub fn json_print(json_str: String, color: bool) {
if color {
#[cfg(feature = "syntax")]
{
PrettyPrinter::new()
.language("json")
.header(false)
.grid(false)
.line_numbers(false)
.input(Input::from_bytes(json_str.as_bytes()))
.print()
.unwrap();
println!(",");
}
#[cfg(not(feature = "syntax"))]
{
println!("{},", json_str);
}
} else {
println!("{},", json_str);
}
}

/// Handles the request from the CLI when used as a binary
pub fn process_cli_request() -> Result<(), Box<dyn std::error::Error>> {
let init_time = std::time::Instant::now();
Expand All @@ -143,12 +173,17 @@ pub fn process_cli_request() -> Result<(), Box<dyn std::error::Error>> {
"info" => tracing::Level::INFO,
"debug" => tracing::Level::DEBUG,
"trace" => tracing::Level::TRACE,
_ => tracing::Level::INFO,
_ => {
tracing::warn!("Invalid verbosity level, defaulting to INFO");
tracing::Level::INFO
}
};
let color = cli.color;
tracing_subscriber::fmt()
.with_max_level(level)
.with_env_filter("info")
.with_env_filter(level.to_string())
.init();
#[cfg(feature = "syntax")]
match &cli.command {
Commands::Generate => {
ProtoMorphist::gen(&cli.source, &cli.output.expect("Requires --output"))?;
Expand All @@ -169,7 +204,13 @@ pub fn process_cli_request() -> Result<(), Box<dyn std::error::Error>> {
};
for source in sources {
tracing::info!("Processing {:?}", source);
let file_contents = crate::read_file(&source).unwrap();
let file_contents = match crate::read_file(&source) {
Ok(res) => res,
Err(e) => {
tracing::error!("Error reading file: {:?}", e);
continue;
}
};
let (_input, mpq) = match parser::parse(&file_contents) {
Ok(res) => res,
Err(e) => {
Expand All @@ -188,7 +229,7 @@ pub fn process_cli_request() -> Result<(), Box<dyn std::error::Error>> {
let res = TrackerEventIterator::new(&source)?;
println!("[");
for evt in res.into_iter() {
println!("{},", serde_json::to_string(&evt)?);
json_print(serde_json::to_string(&evt).unwrap(), color);
}
println!("]");
}
Expand All @@ -197,32 +238,32 @@ pub fn process_cli_request() -> Result<(), Box<dyn std::error::Error>> {
let res = GameEventIterator::new(&source)?;
println!("[");
for evt in res.into_iter() {
println!("{},", serde_json::to_string(&evt)?);
json_print(serde_json::to_string(&evt).unwrap(), color);
}
println!("]");
}
ReadTypes::MessageEvents => {
let res = read_message_events(&source_path, &mpq, &file_contents)?;
println!("[");
for evt in res {
println!("{},", serde_json::to_string(&evt)?);
json_print(serde_json::to_string(&evt).unwrap(), color);
}
println!("]");
}
ReadTypes::Details => {
let res = read_details(&source_path, &mpq, &file_contents)?;
println!("{},", serde_json::to_string(&res)?);
let evt = read_details(&source_path, &mpq, &file_contents)?;
json_print(serde_json::to_string(&evt).unwrap(), color);
}
ReadTypes::InitData => {
let res = read_init_data(&source_path, &mpq, &file_contents)?;
println!("{},", serde_json::to_string(&res)?);
let evt = read_init_data(&source_path, &mpq, &file_contents)?;
json_print(serde_json::to_string(&evt).unwrap(), color);
}
ReadTypes::TransistEvents => {
tracing::info!("Transducing through both Game and Tracker Events");
println!("[");
let res = crate::state::SC2EventIterator::new(&source)?;
for evt in res.into_iter() {
println!("{},", serde_json::to_string(&evt)?);
json_print(serde_json::to_string(&evt).unwrap(), color);
}
println!("]");
}
Expand Down
Loading

0 comments on commit 97720e1

Please sign in to comment.