Skip to content

Commit

Permalink
Performance tweaks (#31)
Browse files Browse the repository at this point in the history
* use Cow<'a, str> for return value of parse(...) instead of String

* more copy reduction

* more minor tweaks to reduce copying

* Thread SmallRng through functions to avoid repeated initialization

* better lifetime naming

* faster implementation of 'scramble'

* use Cow in implementation of 'fixed'

* add missing file

* tidy up code

* remove unnecessary call to to_string

* don't clone state.position every time row_parser::parse is called

* code style
  • Loading branch information
addrummond authored Aug 17, 2022
1 parent 7c96d9c commit fd073f0
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 112 deletions.
7 changes: 5 additions & 2 deletions src/file_reader.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::parsers::rng;
use crate::parsers::row_parser;
use crate::parsers::state::State;
use crate::parsers::strategies::Strategies;
Expand All @@ -22,15 +23,17 @@ pub fn read(

let mut row_parser_state = State::new();

let mut rng = rng::get();

loop {
match reader.read_line(&mut line) {
Ok(bytes_read) => {
if bytes_read == 0 {
break;
}

line = line.to_string();
let transformed_row = row_parser::parse(&line, &mut row_parser_state, strategies);
let transformed_row =
row_parser::parse(&mut rng, &line, &mut row_parser_state, strategies);
file_writer.write_all(transformed_row.as_bytes())?;
line.clear();
}
Expand Down
1 change: 1 addition & 0 deletions src/parsers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub mod copy_row;
pub mod create_row;
pub mod db_schema;
pub mod national_insurance_number;
pub mod rng;
pub mod row_parser;
pub mod sanitiser;
pub mod state;
Expand Down
6 changes: 6 additions & 0 deletions src/parsers/rng.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use rand::rngs::SmallRng;
use rand::SeedableRng;

pub fn get() -> SmallRng {
SmallRng::from_rng(rand::thread_rng()).unwrap_or_else(|_| SmallRng::from_entropy())
}
77 changes: 52 additions & 25 deletions src/parsers/row_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::parsers::transformer;
use crate::parsers::types;
use crate::parsers::types::Column;
use itertools::Itertools;
use rand::rngs::SmallRng;
use std::borrow::Cow;

#[derive(Debug, PartialEq)]
enum RowType {
Expand Down Expand Up @@ -38,19 +40,21 @@ fn row_type(line: &str, state: &Position) -> RowType {
}
}

pub fn parse(line: &str, state: &mut State, strategies: &Strategies) -> String {
pub fn parse<'line>(
rng: &mut SmallRng,
line: &'line str,
state: &mut State,
strategies: &Strategies,
) -> Cow<'line, str> {
let sanitised_line = sanitiser::trim(line);
match (
row_type(sanitised_line, &state.position),
state.position.clone(),
) {
match (row_type(sanitised_line, &state.position), &state.position) {
(RowType::CreateTableStart, _position) => {
let table_name = create_row::parse(sanitised_line);
state.update_position(Position::InCreateTable {
table_name,
types: Vec::new(),
});
line.to_string()
Cow::from(line)
}
(
RowType::CreateTableRow,
Expand All @@ -60,34 +64,40 @@ pub fn parse(line: &str, state: &mut State, strategies: &Strategies) -> String {
},
) => {
state.update_position(Position::InCreateTable {
table_name,
table_name: table_name.clone(),
types: add_create_table_row_to_types(sanitised_line, current_types.to_vec()),
});
line.to_string()
Cow::from(line)
}
(RowType::CreateTableEnd, _position) => {
state.update_position(Position::Normal);
line.to_string()
Cow::from(line)
}
(RowType::CopyBlockStart, _position) => {
let current_table = copy_row::parse(sanitised_line, strategies);
state.update_position(Position::InCopy { current_table });
line.to_string()
Cow::from(line)
}
(RowType::CopyBlockEnd, _position) => {
state.update_position(Position::Normal);
line.to_string()
Cow::from(line)
}
(RowType::CopyBlockRow, Position::InCopy { current_table }) => {
(RowType::CopyBlockRow, Position::InCopy { ref current_table }) => {
let transformed = Cow::from(transform_row(
rng,
sanitised_line,
current_table,
&state.types,
));
state.update_position(Position::InCopy {
current_table: current_table.clone(),
});
transform_row(sanitised_line, &current_table, &state.types)
transformed
}

(RowType::Normal, Position::Normal) => {
state.update_position(Position::Normal);
line.to_string()
Cow::from(line)
}
(row_type, position) => {
panic!(
Expand All @@ -98,7 +108,12 @@ pub fn parse(line: &str, state: &mut State, strategies: &Strategies) -> String {
}
}

fn transform_row(line: &str, current_table: &CurrentTableTransforms, types: &Types) -> String {
fn transform_row(
rng: &mut SmallRng,
line: &str,
current_table: &CurrentTableTransforms,
types: &Types,
) -> String {
let column_values = split_row(line);

let mut transformed = column_values.enumerate().map(|(i, value)| {
Expand All @@ -115,6 +130,7 @@ fn transform_row(line: &str, current_table: &CurrentTableTransforms, types: &Typ
});

transformer::transform(
rng,
value,
column_type,
&current_column.transformer,
Expand Down Expand Up @@ -143,6 +159,7 @@ fn split_row(line: &str) -> std::str::Split<char> {
#[cfg(test)]
mod tests {
use super::*;
use crate::parsers::rng;
use crate::parsers::strategy_structs::{ColumnInfo, DataCategory, TransformerType};
use crate::parsers::types::{SubType, Type};
use std::collections::HashMap;
Expand All @@ -153,7 +170,8 @@ mod tests {
let strategies = Strategies::new_from("public.users".to_string(), HashMap::from([]));

let mut state = State::new();
let transformed_row = parse(create_table_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, create_table_row, &mut state, &strategies);
assert_eq!(
state.position,
Position::InCreateTable {
Expand All @@ -170,7 +188,8 @@ mod tests {
let strategies = Strategies::new_from("public.users".to_string(), HashMap::from([]));

let mut state = State::new();
let transformed_row = parse(create_table_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, create_table_row, &mut state, &strategies);
assert_eq!(
state.position,
Position::InCreateTable {
Expand All @@ -196,7 +215,8 @@ mod tests {
},
types: Types::new(HashMap::default()),
};
let transformed_row = parse(create_table_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, create_table_row, &mut state, &strategies);

assert_eq!(
state.position,
Expand Down Expand Up @@ -229,7 +249,8 @@ mod tests {
},
types: Types::new(HashMap::default()),
};
let transformed_row = parse(create_table_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, create_table_row, &mut state, &strategies);

assert_eq!(
state.position,
Expand All @@ -256,7 +277,8 @@ mod tests {
},
types: Types::new(HashMap::default()),
};
let transformed_row = parse(create_table_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, create_table_row, &mut state, &strategies);

assert_eq!(state.position, Position::Normal);

Expand Down Expand Up @@ -293,7 +315,8 @@ mod tests {
let strategies = Strategies::new_from("public.users".to_string(), column_infos);

let mut state = State::new();
let transformed_row = parse(copy_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, copy_row, &mut state, &strategies);

assert_eq!(copy_row, transformed_row);

Expand Down Expand Up @@ -338,7 +361,8 @@ mod tests {
let strategies = Strategies::new_from("public.users".to_string(), transforms);

let mut state = State::new();
let transformed_row = parse(end_copy_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, end_copy_row, &mut state, &strategies);
assert!(state.position == Position::Normal);
assert_eq!(end_copy_row, transformed_row);
}
Expand All @@ -349,7 +373,8 @@ mod tests {
let strategies = Strategies::new_from("public.users".to_string(), HashMap::new());

let mut state = State::new();
let transformed_row = parse(non_table_data_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, non_table_data_row, &mut state, &strategies);
assert!(state.position == Position::Normal);
assert_eq!(non_table_data_row, transformed_row);
}
Expand Down Expand Up @@ -394,7 +419,8 @@ mod tests {
.add_type("public.users", "column_3", SubType::Character)
.build(),
};
let transformed_row = parse(table_data_row, &mut state, &strategies);
let mut rng = rng::get();
let transformed_row = parse(&mut rng, table_data_row, &mut state, &strategies);
assert_eq!("first\tsecond\tthird\n", transformed_row);
}

Expand All @@ -417,7 +443,8 @@ mod tests {
.add_array_type("public.users", "column_1", SubType::Character)
.build(),
};
let processed_row = parse(table_data_row, &mut state, &strategies);
let mut rng = rng::get();
let processed_row = parse(&mut rng, table_data_row, &mut state, &strategies);
assert!(table_data_row != processed_row);
}
}
Loading

0 comments on commit fd073f0

Please sign in to comment.