From 2220c9f9a0ede495cc26745cf84cc038a4716f65 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 2 Jul 2024 14:37:29 -0400 Subject: [PATCH 1/8] table; *: rename input_parser -> table --- src/psc_bgk.cxx | 2 +- src/psc_bgk_util/bgk_params.hxx | 2 +- src/psc_bgk_util/{input_parser.hxx => table.hxx} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename src/psc_bgk_util/{input_parser.hxx => table.hxx} (100%) diff --git a/src/psc_bgk.cxx b/src/psc_bgk.cxx index e51a73388..2c1e1d23f 100644 --- a/src/psc_bgk.cxx +++ b/src/psc_bgk.cxx @@ -8,7 +8,7 @@ #include "psc_config.hxx" #include "psc_bgk_util/bgk_params.hxx" -#include "psc_bgk_util/input_parser.hxx" +#include "psc_bgk_util/table.hxx" #include "psc_bgk_util/params_parser.hxx" // ====================================================================== diff --git a/src/psc_bgk_util/bgk_params.hxx b/src/psc_bgk_util/bgk_params.hxx index f39d210a2..bc7224ed7 100644 --- a/src/psc_bgk_util/bgk_params.hxx +++ b/src/psc_bgk_util/bgk_params.hxx @@ -2,7 +2,7 @@ #include #include "params_parser.hxx" -#include "input_parser.hxx" +#include "table.hxx" // ====================================================================== // getCalculatedBoxSize diff --git a/src/psc_bgk_util/input_parser.hxx b/src/psc_bgk_util/table.hxx similarity index 100% rename from src/psc_bgk_util/input_parser.hxx rename to src/psc_bgk_util/table.hxx From bb4a759b216f27d5b1392e9f8925d4187beced40 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 2 Jul 2024 14:39:50 -0400 Subject: [PATCH 2/8] table: use snake_case for functions --- src/psc_bgk_util/table.hxx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/psc_bgk_util/table.hxx b/src/psc_bgk_util/table.hxx index 2cdd3f508..457315cd4 100644 --- a/src/psc_bgk_util/table.hxx +++ b/src/psc_bgk_util/table.hxx @@ -12,7 +12,7 @@ namespace parsing { -void assertFileOpen(const std::ifstream& file, const std::string file_path) +void assert_file_open(const std::ifstream& file, const std::string file_path) { if (!file.is_open()) { std::cout << "Failed to open input file: " << file_path << std::endl; @@ -22,7 +22,7 @@ void assertFileOpen(const std::ifstream& file, const std::string file_path) // from // https://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf -std::istream& safeGetline(std::istream& is, std::string& t) +std::istream& safe_getline(std::istream& is, std::string& t) { t.clear(); @@ -47,10 +47,10 @@ std::istream& safeGetline(std::istream& is, std::string& t) // from // https://stackoverflow.com/questions/3482064/counting-the-number-of-lines-in-a-text-file -int countLines(const std::string file_path) +int count_lines(const std::string file_path) { std::ifstream file(file_path); - assertFileOpen(file, file_path); + assert_file_open(file, file_path); file.unsetf(std::ios_base::skipws); unsigned newline_count = std::count(std::istream_iterator(file), @@ -99,22 +99,22 @@ public: ParsedData(const std::string file_path, int ncols, int indep_col, int lines_to_skip) - : nrows(parsing::countLines(file_path) - lines_to_skip), + : nrows(parsing::count_lines(file_path) - lines_to_skip), ncols(ncols), data(nrows * ncols), indep_col(indep_col) { - loadData(file_path, lines_to_skip); + load_data(file_path, lines_to_skip); } // ---------------------------------------------------------------------- - // loadData + // load_data // Parses all the data - void loadData(const std::string file_path, int lines_to_skip) + void load_data(const std::string file_path, int lines_to_skip) { std::ifstream file(file_path); - parsing::assertFileOpen(file, file_path); + parsing::assert_file_open(file, file_path); for (int i = 0; i < lines_to_skip; i++) file.ignore(512, '\n'); @@ -122,7 +122,7 @@ public: // iterate over each line int row = 0; - for (std::string line; !parsing::safeGetline(file, line).eof();) { + for (std::string line; !parsing::safe_getline(file, line).eof();) { if (row >= nrows) { std ::cout << "Error: too many rows. Expected " << nrows << ", got at least " << row + 1 << std::endl; From b813d57a79375ea1c35451153152dcc3f1c1f889 Mon Sep 17 00:00:00 2001 From: James Date: Tue, 2 Jul 2024 14:57:30 -0400 Subject: [PATCH 3/8] table: restore/add documentation to safe_getline --- src/psc_bgk_util/table.hxx | 42 ++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/psc_bgk_util/table.hxx b/src/psc_bgk_util/table.hxx index 457315cd4..91fd6b209 100644 --- a/src/psc_bgk_util/table.hxx +++ b/src/psc_bgk_util/table.hxx @@ -20,27 +20,39 @@ void assert_file_open(const std::ifstream& file, const std::string file_path) } } -// from -// https://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf -std::istream& safe_getline(std::istream& is, std::string& t) +// An implementation of `getline` that correctly handles any of the following +// line endings: "\\n", "\\r\\n", "\\r", eof. +// +// This is necessary because `std::istream.getline()` only handles the +// platform's line endings, and thus fails for files that were from a different +// platform. See +// https://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf. +std::istream& safe_getline(std::istream& stream, std::string& out_string) { - t.clear(); + out_string.clear(); + + // The characters in the stream are read one-by-one using a std::streambuf. + // That is faster than reading them one-by-one using the std::istream. + // Code that uses streambuf this way must be guarded by a sentry object. + // The sentry object performs various tasks, + // such as thread synchronization and updating the stream state. + + std::istream::sentry sentry(stream, true); - std::istream::sentry se(is, true); // this does stuff, apparently - std::streambuf* sb = is.rdbuf(); + std::streambuf* buffer = stream.rdbuf(); - for (;;) { - int c = sb->sbumpc(); + while (true) { + int c = buffer->sbumpc(); switch (c) { case '\r': - if (sb->sgetc() == '\n') - sb->sbumpc(); - case '\n': return is; + if (buffer->sgetc() == '\n') + buffer->sbumpc(); + case '\n': return stream; case std::streambuf::traits_type::eof(): - if (t.empty()) - is.setstate(std::ios::eofbit); - return is; - default: t += (char)c; + if (out_string.empty()) + stream.setstate(std::ios::eofbit); + return stream; + default: out_string += (char)c; } } } From 4163376d3af66e8c434fd1f09e88c16e2859a4be Mon Sep 17 00:00:00 2001 From: James Date: Wed, 3 Jul 2024 13:27:41 -0400 Subject: [PATCH 4/8] table: +Table --- src/psc_bgk_util/table.hxx | 144 +++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/src/psc_bgk_util/table.hxx b/src/psc_bgk_util/table.hxx index 91fd6b209..3cffb7fb8 100644 --- a/src/psc_bgk_util/table.hxx +++ b/src/psc_bgk_util/table.hxx @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -208,4 +210,146 @@ public: double* operator[](const int row) { return data.data() + row * ncols; } int get_nrows() const { return nrows; } +}; + +class Table +{ + std::unordered_map header; + std::vector> data; + + static std::unordered_map parse_header( + std::ifstream& line_stream) + { + std::unordered_map header; + + int n_cols = 0; + std::string header_line; + parsing::safe_getline(line_stream, header_line); + + std::istringstream word_stream(header_line); + for (std::string word; word_stream >> word;) { + header[word] = n_cols; + n_cols++; + } + + return header; + } + + static std::vector parse_data_row(std::istringstream& word_stream, + int expected_n_cols) + { + std::vector data_row(expected_n_cols); + for (std::string word; word_stream >> word;) { + data_row.push_back(std::stod(word)); + } + return data_row; + } + + static std::vector> parse_data(std::ifstream& line_stream, + int expected_n_cols) + { + std::vector> data; + + int row_number = 0; + for (std::string line; !parsing::safe_getline(line_stream, line).eof();) { + std::istringstream word_stream(line); + auto data_row = parse_data_row(word_stream, expected_n_cols); + + int n_cols = data_row.size(); + if (n_cols == 0) { + break; + } else if (n_cols != expected_n_cols) { + std::cout << "Error: bad column count. Expected " << expected_n_cols + << " columns, got " << n_cols << " on row " << row_number + << " of data." << std::endl; + exit(EXIT_FAILURE); + } + + data.push_back(data_row); + } + + return data; + } + +public: + // Parses the tab-separated value (tsv) file at the given path. + Table(const std::string path) + { + std::ifstream line_stream(path); + + header = parse_header(line_stream); + data = parse_data(line_stream, this->n_cols()); + } + + int n_rows() { return data.size(); } + int n_cols() { return header.size(); } + + double get(std::string column_name, int row) + { + int col = header[column_name]; + return data[row][col]; + } + + // Determines which row satisfies `col[row] <= val < col[row+1]`, where `col` + // is the column of data. The data is assumed to be strictly increasing. The + // returned row may be -1 or `n_rows`, indicating the given value is out of + // bounds. + int get_row(std::string column_name, double val) + { + int col = header[column_name]; + int min_row = 0; + int max_row = n_rows() - 1; + double delta = data[1][col] - data[0][col]; + + // initial guess; is precise when values are linearly spaced + int row = std::max(min_row, std::min(max_row, (int)(val / delta))); + + while (row < max_row && val > data[row][col]) { + row++; + } + + if (row == max_row && val > data[row][col]) { + return max_row + 1; + } + + while (row > min_row && val < data[row][col]) { + row--; + } + + if (row == min_row && val < data[row][col]) { + return min_row - 1; + } + + return row; + } + + // Gets the value of `column_name` corresponding to when `indep_column_name` + // equals `indep_val` via linear interpolation. The independent variable is + // assumed to be be strictly increasing. + double get_interpolated(std::string column_name, + std::string indep_column_name, double indep_val) + { + if (column_name == indep_column_name) { + return indep_val; + } + + int col = header[column_name]; + int indep_col = header[indep_column_name]; + + int row = get_row(indep_column_name, indep_val); + + if (row < 0 || row >= n_rows()) { + throw std::invalid_argument("value to interpolate on is out of bounds"); + } + + if (data[row][indep_col] == indep_val) { + return data[row][col]; + } + + // weights for linear interpolation + double w1 = data[row + 1][indep_col] - indep_val; + double w2 = indep_val - data[row][indep_col]; + + return (w1 * data[row][col] + w2 * data[row + 1][col]) / (w1 + w2); + } }; \ No newline at end of file From 2153547e786ee63887ac847a64e91e0bc9aaabee Mon Sep 17 00:00:00 2001 From: James Date: Wed, 3 Jul 2024 13:29:49 -0400 Subject: [PATCH 5/8] bgk: use Table instead of ParsedData --- src/psc_bgk.cxx | 22 +++++++++++----------- src/psc_bgk_util/bgk_params.hxx | 18 +++++++++--------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/psc_bgk.cxx b/src/psc_bgk.cxx index 2c1e1d23f..fd4d41fe4 100644 --- a/src/psc_bgk.cxx +++ b/src/psc_bgk.cxx @@ -54,7 +54,8 @@ enum DATA_COL namespace { -ParsedData* parsedData; +// table of initial conditions +Table* ic_table; std::string read_checkpoint_filename; @@ -78,9 +79,8 @@ void setupParameters(int argc, char** argv) } std::string path_to_params(argv[1]); ParsedParams parsedParams(path_to_params); - parsedData = new ParsedData(parsedParams.get("path_to_data"), - n_cols, COL_RHO, 1); - g.loadParams(parsedParams, *parsedData); + ic_table = new Table(parsedParams.get("path_to_data")); + g.loadParams(parsedParams, *ic_table); psc_params.nmax = parsedParams.get("nmax"); psc_params.stats_every = parsedParams.get("stats_every"); @@ -204,7 +204,7 @@ inline double getIonDensity(double rho) { if (!g.do_ion) return g.n_i; - double potential = parsedData->get_interpolated(COL_PHI, rho); + double potential = ic_table->get_interpolated("Psi", "rho", rho); return std::exp(-potential / g.T_i); } @@ -360,7 +360,7 @@ void initializeParticles(Balance& balance, Grid_t*& grid_ptr, Mparticles& mprts, // other's moments are given in input file double psi_cs = - parsedData->get_interpolated(COL_PHI, rho) / sqr(g.beta); + ic_table->get_interpolated("Psi", "rho", rho) / sqr(g.beta); double n_background = exp(psi_cs); double p_background = n_background / np.n; np.p = pdist_case4(p_background, y, z, rho); @@ -369,12 +369,12 @@ void initializeParticles(Balance& balance, Grid_t*& grid_ptr, Mparticles& mprts, } if (rho == 0) { - double Te = parsedData->get_interpolated(COL_TE, rho); + double Te = ic_table->get_interpolated("Te", "rho", rho); np.p = setup_particles.createMaxwellian( {np.kind, np.n, {0, 0, 0}, {Te, Te, Te}, np.tag}); } else if (g.maxwellian) { - double Te = parsedData->get_interpolated(COL_TE, rho); - double vphi = parsedData->get_interpolated(COL_V_PHI, rho); + double Te = ic_table->get_interpolated("Te", "rho", rho); + double vphi = ic_table->get_interpolated("v_phi", "rho", rho); double coef = g.v_e_coef * (g.reverse_v ? -1 : 1) * (g.reverse_v_half && y < 0 ? -1 : 1); @@ -419,7 +419,7 @@ void initializePhi(BgkMfields& phi) setupScalarField( phi, Centering::Centerer(Centering::NC), [&](int m, double crd[3]) { double rho = sqrt(sqr(getCoord(crd[1])) + sqr(getCoord(crd[2]))); - return parsedData->get_interpolated(COL_PHI, rho); + return ic_table->get_interpolated("Psi", "rho", rho); }); writeMF(phi, "phi", {"phi"}); @@ -556,7 +556,7 @@ static void run(int argc, char** argv) read_checkpoint(read_checkpoint_filename, *grid_ptr, mprts, mflds); } - delete parsedData; + delete ic_table; // ---------------------------------------------------------------------- // Hand off to PscIntegrator to run the simulation diff --git a/src/psc_bgk_util/bgk_params.hxx b/src/psc_bgk_util/bgk_params.hxx index bc7224ed7..71ab27ac8 100644 --- a/src/psc_bgk_util/bgk_params.hxx +++ b/src/psc_bgk_util/bgk_params.hxx @@ -36,23 +36,23 @@ double getSpikeSize(double B, double k, double beta) // The hole size is determined empirically from the input data. This function // finds where |electron density - 1| <= epsilon, where epsilon is small. -double getHoleSize(ParsedData& data) +double getHoleSize(Table& ic_table) { double epsilon = 1e-4; const int COL_RHO = 0; const int COL_NE = 1; - for (int row = data.get_nrows() - 1; row >= 0; row--) { - if (std::abs(data[row][COL_NE] - 1) > epsilon) - return data[row][COL_RHO]; + for (int row = ic_table.n_rows() - 1; row >= 0; row--) { + if (std::abs(ic_table.get("ne", row) - 1) > epsilon) + return ic_table.get("rho", row); } throw "Unable to determine hole size."; } // Calculates a box size big enough to resolve the spike and the hole. -double getCalculatedBoxSize(double B, double k, double beta, ParsedData& data) +double getCalculatedBoxSize(double B, double k, double beta, Table& ic_table) { double spike_size = getSpikeSize(B, k, beta); - double hole_size = getHoleSize(data); + double hole_size = getHoleSize(ic_table); LOG_INFO("spike radius: %f\thole radius: %f\n", spike_size, hole_size); return 2 * std::max(spike_size, hole_size); } @@ -106,7 +106,7 @@ struct PscBgkParams double rel_box_size_3; // length of 3rd dimension in calculated units int n_patches_3; // number of patches in 3rd dimension - void loadParams(ParsedParams parsedParams, ParsedData& data) + void loadParams(ParsedParams parsedParams, Table& ic_table) { box_size = parsedParams.getAndWarnOrDefault("box_size", -1); rel_box_size = parsedParams.getOrDefault("rel_box_size", 1); @@ -162,8 +162,8 @@ struct PscBgkParams } if (box_size <= 0) - box_size = rel_box_size * getCalculatedBoxSize(Hx, k, beta, data); + box_size = rel_box_size * getCalculatedBoxSize(Hx, k, beta, ic_table); if (box_size_3 <= 0) - box_size_3 = rel_box_size_3 * getCalculatedBoxSize(Hx, k, beta, data); + box_size_3 = rel_box_size_3 * getCalculatedBoxSize(Hx, k, beta, ic_table); } }; From bdebeb3cbd5124df8e3b18a07e0b0b5771986ba0 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 3 Jul 2024 13:30:07 -0400 Subject: [PATCH 6/8] table: -ParsedData --- src/psc_bgk_util/table.hxx | 137 ------------------------------------- 1 file changed, 137 deletions(-) diff --git a/src/psc_bgk_util/table.hxx b/src/psc_bgk_util/table.hxx index 3cffb7fb8..79cc8b144 100644 --- a/src/psc_bgk_util/table.hxx +++ b/src/psc_bgk_util/table.hxx @@ -75,143 +75,6 @@ int count_lines(const std::string file_path) } // namespace parsing -// ====================================================================== -// ParsedData -// Parses a space-separated list of values, such as a tsv file. -// Assuming there is a single independent variable, linearly interpolates other -// values. - -class ParsedData -{ -private: - const int nrows, ncols; - std::vector data; - const int indep_col; - double indep_var_step; - - // ---------------------------------------------------------------------- - // get_row - // Gets index of row containing greatest lower bound of given indep_var_val - - int get_row(double indep_var_val) - { - // initial guess; is precise when indep_var is linearly spaced - int row = std::min((int)(indep_var_val / indep_var_step), nrows - 1); - while (row < nrows - 1 && indep_var_val > (*this)[row][indep_col]) - row++; - if (indep_var_val > (*this)[row][indep_col]) { - return nrows; // tried to get a value beyond what's in input - } - while (row > 0 && indep_var_val < (*this)[row][indep_col]) - row--; - return row; - } - -public: - // ---------------------------------------------------------------------- - // ctor - - ParsedData(const std::string file_path, int ncols, int indep_col, - int lines_to_skip) - : nrows(parsing::count_lines(file_path) - lines_to_skip), - ncols(ncols), - data(nrows * ncols), - indep_col(indep_col) - { - load_data(file_path, lines_to_skip); - } - - // ---------------------------------------------------------------------- - // load_data - // Parses all the data - - void load_data(const std::string file_path, int lines_to_skip) - { - std::ifstream file(file_path); - parsing::assert_file_open(file, file_path); - - for (int i = 0; i < lines_to_skip; i++) - file.ignore(512, '\n'); - - // iterate over each line - int row = 0; - - for (std::string line; !parsing::safe_getline(file, line).eof();) { - if (row >= nrows) { - std ::cout << "Error: too many rows. Expected " << nrows - << ", got at least " << row + 1 << std::endl; - exit(EXIT_FAILURE); - } - - // iterate over each entry within a line - std::istringstream iss(line); - int col = 0; - - for (std::string result; iss >> result;) { - if (col >= ncols) { - std ::cout << "Error: too many columns. Expected " << ncols - << ", got at least " << col + 1 << " in line \"" << line - << "\"" << std::endl; - exit(EXIT_FAILURE); - } - - // write entry to data - (*this)[row][col] = std::stod(result); - col++; - } - if (col != ncols) { - std ::cout << "Error: not enough columns. Expected " << ncols - << ", got " << col << std::endl; - exit(EXIT_FAILURE); - } - row++; - } - if (row != nrows) { - std ::cout << "Error: not enough rows. Expected " << nrows << ", got " - << row << std::endl; - exit(EXIT_FAILURE); - } - file.close(); - - indep_var_step = - ((*this)[nrows - 1][indep_col] - (*this)[0][indep_col]) / nrows; - } - - // ---------------------------------------------------------------------- - // get_interpolated - // Calculates and returns a linearly interpolated value (specified by col) at - // given value of independent variable. If indep_var_val is too high, returns - // the last value available. - - double get_interpolated(int col, double indep_var_val) - { - if (indep_var_val < (*this)[0][indep_col]) { - mpi_printf(MPI_COMM_WORLD, "Out of bounds (too low): %f (< %f)\n", - indep_var_val, (*this)[0][indep_col]); - std::exit(1); - } - - int row = get_row(indep_var_val); - - if (row == nrows) { - return (*this)[nrows - 1][indep_col]; - } - - if ((*this)[row][indep_col] == indep_var_val) - return (*this)[row][col]; - - // weights for linear interpolation - double w1 = (*this)[row + 1][indep_col] - indep_var_val; - double w2 = indep_var_val - (*this)[row][indep_col]; - - return (w1 * (*this)[row][col] + w2 * (*this)[row + 1][col]) / (w1 + w2); - } - - double* operator[](const int row) { return data.data() + row * ncols; } - - int get_nrows() const { return nrows; } -}; - class Table { std::unordered_map header; From 8e5184b6d505e2feb31262cc0f1e5d8f2d30db7b Mon Sep 17 00:00:00 2001 From: James Date: Wed, 3 Jul 2024 14:41:43 -0400 Subject: [PATCH 7/8] table: fix initial data row capacity --- src/psc_bgk_util/table.hxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/psc_bgk_util/table.hxx b/src/psc_bgk_util/table.hxx index 79cc8b144..47da585bd 100644 --- a/src/psc_bgk_util/table.hxx +++ b/src/psc_bgk_util/table.hxx @@ -101,7 +101,8 @@ class Table static std::vector parse_data_row(std::istringstream& word_stream, int expected_n_cols) { - std::vector data_row(expected_n_cols); + std::vector data_row; + data_row.reserve(expected_n_cols); for (std::string word; word_stream >> word;) { data_row.push_back(std::stod(word)); } From 385751328881056b5130d189ba1c37b4a01a6923 Mon Sep 17 00:00:00 2001 From: James Date: Wed, 10 Jul 2024 14:28:07 -0400 Subject: [PATCH 8/8] bgk: -DATA_COL enum now completely useless --- src/psc_bgk.cxx | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/psc_bgk.cxx b/src/psc_bgk.cxx index fd4d41fe4..75d049287 100644 --- a/src/psc_bgk.cxx +++ b/src/psc_bgk.cxx @@ -37,18 +37,6 @@ using Checks = PscConfig::Checks; using Marder = PscConfig::Marder; using OutputParticles = PscConfig::OutputParticles; -// for parser -enum DATA_COL -{ - COL_RHO, - COL_NE, - COL_V_PHI, - COL_TE, - COL_E_RHO, - COL_PHI, - n_cols -}; - // ====================================================================== // Global parameters