diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml index a23b94da08a56..610b76b0070c1 100644 --- a/.github/workflows/comment_bot.yml +++ b/.github/workflows/comment_bot.yml @@ -46,7 +46,7 @@ jobs: python-version: 3.8 - name: Install Archery and Crossbow dependencies run: pip install -e arrow/dev/archery[bot] - - name: Handle Github comment event + - name: Handle GitHub comment event env: ARROW_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} CROSSBOW_GITHUB_TOKEN: ${{ secrets.CROSSBOW_GITHUB_TOKEN }} diff --git a/.github/workflows/dev_pr/issue_check.js b/.github/workflows/dev_pr/issue_check.js index 75e86b40923c2..fb5d986dff2f7 100644 --- a/.github/workflows/dev_pr/issue_check.js +++ b/.github/workflows/dev_pr/issue_check.js @@ -103,7 +103,7 @@ async function commentNotStartedTicket(github, context, pullRequestNumber) { } /** - * Assigns the Github Issue to the PR creator. + * Assigns the GitHub Issue to the PR creator. * * @param {Object} github * @param {Object} context diff --git a/.github/workflows/issue_bot.yml b/.github/workflows/issue_bot.yml index 02379a379ffce..ec614ca1e7c56 100644 --- a/.github/workflows/issue_bot.yml +++ b/.github/workflows/issue_bot.yml @@ -64,7 +64,7 @@ jobs: "per_page": 100, }); - // this removes non-existent labels + // this removes nonexistent labels component_labels = component_labels.filter( label => repo_labels.data.some(repo_label => repo_label.name === label) ); diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ecdf628355ea..6101f5d3cac25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1459,7 +1459,7 @@ * [ARROW-12172](https://issues.apache.org/jira/browse/ARROW-12172) - [Python][Packaging] Pass python version as setuptools pretend version in the macOS wheel builds * [ARROW-12178](https://issues.apache.org/jira/browse/ARROW-12178) - [CI] Update setuptools in the ubuntu images * [ARROW-12186](https://issues.apache.org/jira/browse/ARROW-12186) - [Rust][DataFusion] Fix regexp_match test -* [ARROW-12209](https://issues.apache.org/jira/browse/ARROW-12209) - [JS] Copy all src files into the the TypeScript package +* [ARROW-12209](https://issues.apache.org/jira/browse/ARROW-12209) - [JS] Copy all src files into the TypeScript package * [ARROW-12220](https://issues.apache.org/jira/browse/ARROW-12220) - [C++][CI] Thread sanitizer failure * [ARROW-12226](https://issues.apache.org/jira/browse/ARROW-12226) - [C++] Fix Address Sanitizer failures * [ARROW-12227](https://issues.apache.org/jira/browse/ARROW-12227) - [R] Fix RE2 and median nightly build failures @@ -11430,7 +11430,7 @@ * [ARROW-67](https://issues.apache.org/jira/browse/ARROW-67) - C++ metadata flatbuffer serialization and data movement to memory maps * [ARROW-68](https://issues.apache.org/jira/browse/ARROW-68) - Better error handling for not fully setup systems * [ARROW-70](https://issues.apache.org/jira/browse/ARROW-70) - Add adapt 'lite' DCHECK macros from Kudu as also used in Parquet -* [ARROW-71](https://issues.apache.org/jira/browse/ARROW-71) - [C++] Add clang-tidy and clang-format to the the tool chain. +* [ARROW-71](https://issues.apache.org/jira/browse/ARROW-71) - [C++] Add clang-tidy and clang-format to the tool chain. * [ARROW-73](https://issues.apache.org/jira/browse/ARROW-73) - Support older CMake versions * [ARROW-76](https://issues.apache.org/jira/browse/ARROW-76) - Revise format document to include null count, defer non-nullable arrays to the domain of metadata * [ARROW-78](https://issues.apache.org/jira/browse/ARROW-78) - C++: Add constructor for DecimalType diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h index 292a9ca70dd89..ce15babba4ac1 100644 --- a/c_glib/arrow-dataset-glib/dataset-factory.h +++ b/c_glib/arrow-dataset-glib/dataset-factory.h @@ -100,7 +100,7 @@ GARROW_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_selector( GADatasetFileSystemDatasetFactory *factory, - GArrorFileSelector *selector, + GArrowFileSelector *selector, GError **error); */ diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h index 106b6e40db38f..90eb5ee7e4170 100644 --- a/c_glib/arrow-flight-sql-glib/server.h +++ b/c_glib/arrow-flight-sql-glib/server.h @@ -202,7 +202,7 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightSQLServer, /** * GAFlightSQLServerClass: * @get_flight_info_statement: A virtual function to implement - * `GetFlightInfoStatment` API that gets a #GAFlightInfo for executing a + * `GetFlightInfoStatement` API that gets a #GAFlightInfo for executing a * SQL query. * @do_get_statement: A virtual function to implement `DoGetStatement` API * that gets a #GAFlightDataStream containing the query results. diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index ee6ec69e98366..5171161970bf5 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -5179,7 +5179,7 @@ garrow_binary_dictionary_array_builder_append_indices(GArrowBinaryDictionaryArra * garrow_binary_dictionary_array_builder_get_dictionary_length: * @builder: A #GArrowBinaryDictionaryArrayBuilder. * - * Returns: A number of entries in the dicitonary. + * Returns: A number of entries in the dictionary. * * Since: 2.0.0 */ @@ -5413,7 +5413,7 @@ garrow_string_dictionary_array_builder_append_indices(GArrowStringDictionaryArra * garrow_string_dictionary_array_builder_get_dictionary_length: * @builder: A #GArrowStringDictionaryArrayBuilder. * - * Returns: A number of entries in the dicitonary. + * Returns: A number of entries in the dictionary. * * Since: 2.0.0 */ diff --git a/c_glib/arrow-glib/buffer.cpp b/c_glib/arrow-glib/buffer.cpp index 58f47518c82e6..86d88cebd5ee4 100644 --- a/c_glib/arrow-glib/buffer.cpp +++ b/c_glib/arrow-glib/buffer.cpp @@ -307,7 +307,7 @@ garrow_buffer_get_data(GArrowBuffer *buffer) * @buffer: A #GArrowBuffer. * * Returns: (transfer full) (nullable): The data of the buffer. If the - * buffer is imutable, it returns %NULL. The data is owned by the + * buffer is immutable, it returns %NULL. The data is owned by the * buffer. You should not free the data. * * Since: 0.3.0 diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp index 36d460c9e0276..6ca22e82389e4 100644 --- a/c_glib/arrow-glib/composite-array.cpp +++ b/c_glib/arrow-glib/composite-array.cpp @@ -2077,7 +2077,7 @@ garrow_run_end_encoded_array_find_physical_offset( * range of values from offset to length. * * Avoid calling this function if the physical length can be - * estabilished in some other way (e.g. when iterating over the runs + * established in some other way (e.g. when iterating over the runs * sequentially until the end). This function uses binary-search, so * it has a O(log N) cost. * diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 9692f277d183f..2467b4f6eda6d 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -230,7 +230,7 @@ G_BEGIN_DECLS * #GArrowUTF8NormalizeOptions is a class to customize the * `utf8_normalize` function. * - * #GArrowQuantileOptions is a class to customize the `qunatile` + * #GArrowQuantileOptions is a class to customize the `quantile` * function. * * #GArrowIndexOptions is a class to customize the `index` function. @@ -5423,7 +5423,7 @@ garrow_record_batch_take(GArrowRecordBatch *record_batch, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowArray filterd + * Returns: (nullable) (transfer full): The #GArrowArray filtered * with a boolean selection filter. Nulls in the filter will * result in nulls in the output. * @@ -5659,7 +5659,7 @@ garrow_table_sort_indices(GArrowTable *table, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowTable filterd + * Returns: (nullable) (transfer full): The #GArrowTable filtered * with a boolean selection filter. Nulls in the filter will * result in nulls in the output. * @@ -5698,7 +5698,7 @@ garrow_table_filter(GArrowTable *table, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowTable filterd + * Returns: (nullable) (transfer full): The #GArrowTable filtered * with a chunked array filter. Nulls in the filter will * result in nulls in the output. * @@ -5739,7 +5739,7 @@ garrow_table_filter_chunked_array(GArrowTable *table, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filtered * with a boolean selection filter. Nulls in the filter will * result in nulls in the output. * @@ -5778,7 +5778,7 @@ garrow_chunked_array_filter(GArrowChunkedArray *chunked_array, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowChunkedArray filterd + * Returns: (nullable) (transfer full): The #GArrowChunkedArray filtered * with a chunked array filter. Nulls in the filter will * result in nulls in the output. * @@ -5819,7 +5819,7 @@ garrow_chunked_array_filter_chunked_array(GArrowChunkedArray *chunked_array, * @options: (nullable): A #GArrowFilterOptions. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: (nullable) (transfer full): The #GArrowRecordBatch filterd + * Returns: (nullable) (transfer full): The #GArrowRecordBatch filtered * with a boolean selection filter. Nulls in the filter will * result in nulls in the output. * diff --git a/c_glib/arrow-glib/expression.cpp b/c_glib/arrow-glib/expression.cpp index 419f668823d0a..48b5fd055e4d0 100644 --- a/c_glib/arrow-glib/expression.cpp +++ b/c_glib/arrow-glib/expression.cpp @@ -166,7 +166,7 @@ garrow_field_expression_class_init(GArrowFieldExpressionClass *klass) * @reference: A field name or dot path. * @error: (nullable): Return location for a #GError or %NULL. * - * Returns: A newly created #GArrowFieldExpression on sucess, %NULL on + * Returns: A newly created #GArrowFieldExpression on success, %NULL on * error. * * Since: 6.0.0 diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index 844c83d629b8f..b65e89845480c 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -697,7 +697,7 @@ garrow_file_input_stream_new_file_descriptor(gint file_descriptor, /** * garrow_file_input_stream_get_file_descriptor: - * @stream: A #GArrowFileInuptStream. + * @stream: A #GArrowFileInputStream. * * Returns: The file descriptor of @stream. * diff --git a/c_glib/arrow-glib/local-file-system.cpp b/c_glib/arrow-glib/local-file-system.cpp index c4b29658e31c9..ae503bf73136b 100644 --- a/c_glib/arrow-glib/local-file-system.cpp +++ b/c_glib/arrow-glib/local-file-system.cpp @@ -28,7 +28,7 @@ G_BEGIN_DECLS * @title: Local file system classes * @include: arrow-glib/arrow-glib.h * - * #GArrowLocalFileSystemOptions is a class for specifyiing options of + * #GArrowLocalFileSystemOptions is a class for specifying options of * an instance of #GArrowLocalFileSystem. * * #GArrowLocalFileSystem is a class for an implementation of a file system diff --git a/c_glib/arrow-glib/metadata-version.cpp b/c_glib/arrow-glib/metadata-version.cpp index 69cbaec37e94a..ffefeb2f08dcd 100644 --- a/c_glib/arrow-glib/metadata-version.cpp +++ b/c_glib/arrow-glib/metadata-version.cpp @@ -22,7 +22,7 @@ /** * SECTION: metadata-version * @title: GArrowMetadataVersion - * @short_description: Metadata version mapgging between Arrow and arrow-glib + * @short_description: Metadata version mapping between Arrow and arrow-glib * * #GArrowMetadataVersion provides metadata versions corresponding * to `arrow::ipc::MetadataVersion` values. diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp index b5b51584c496f..a9317e9f28007 100644 --- a/c_glib/arrow-glib/output-stream.cpp +++ b/c_glib/arrow-glib/output-stream.cpp @@ -395,13 +395,13 @@ namespace garrow { int64_t n_bytes) override { GError *error = NULL; gsize n_written_bytes; - auto successed = g_output_stream_write_all(output_stream_, + auto succeeded = g_output_stream_write_all(output_stream_, data, n_bytes, &n_written_bytes, NULL, &error); - if (successed) { + if (succeeded) { position_ += n_written_bytes; return arrow::Status::OK(); } else { @@ -415,8 +415,8 @@ namespace garrow { arrow::Status Flush() override { GError *error = NULL; - auto successed = g_output_stream_flush(output_stream_, NULL, &error); - if (successed) { + auto succeeded = g_output_stream_flush(output_stream_, NULL, &error); + if (succeeded) { return arrow::Status::OK(); } else { return garrow_error_to_status(error, diff --git a/c_glib/parquet-glib/arrow-file-reader.cpp b/c_glib/parquet-glib/arrow-file-reader.cpp index fd21a9e9c3ab9..f7a4b09f814ac 100644 --- a/c_glib/parquet-glib/arrow-file-reader.cpp +++ b/c_glib/parquet-glib/arrow-file-reader.cpp @@ -123,7 +123,7 @@ gparquet_arrow_file_reader_class_init(GParquetArrowFileReaderClass *klass) /** * gparquet_arrow_file_reader_new_arrow: * @source: Arrow source to be read. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (nullable): A newly created #GParquetArrowFileReader. * @@ -152,7 +152,7 @@ gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, /** * gparquet_arrow_file_reader_new_path: * @path: Path to be read. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (nullable): A newly created #GParquetArrowFileReader. * @@ -189,7 +189,7 @@ gparquet_arrow_file_reader_new_path(const gchar *path, /** * gparquet_arrow_file_reader_read_table: * @reader: A #GParquetArrowFileReader. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (transfer full) (nullable): A read #GArrowTable. * @@ -220,7 +220,7 @@ gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, * If an index is negative, the index is counted backward from the * end of the columns. `-1` means the last column. * @n_column_indices: The number of elements of @column_indices. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (transfer full) (nullable): A read #GArrowTable. * @@ -273,7 +273,7 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, /** * gparquet_arrow_file_reader_get_schema: * @reader: A #GParquetArrowFileReader. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (transfer full) (nullable): A got #GArrowSchema. * @@ -302,7 +302,7 @@ gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, * @i: The index of the column to be read. * If an index is negative, the index is counted backward from the * end of the columns. `-1` means the last column. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (transfer full) (nullable): A read #GArrowChunkedArray. * diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp index 537e833053214..f923edbf5e5c8 100644 --- a/c_glib/parquet-glib/arrow-file-writer.cpp +++ b/c_glib/parquet-glib/arrow-file-writer.cpp @@ -406,7 +406,7 @@ gparquet_arrow_file_writer_class_init(GParquetArrowFileWriterClass *klass) * @schema: Arrow schema for written data. * @sink: Arrow output stream to be written. * @writer_properties: (nullable): A #GParquetWriterProperties. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (nullable): A newly created #GParquetArrowFileWriter. * @@ -451,7 +451,7 @@ gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, * @schema: Arrow schema for written data. * @path: Path to be read. * @writer_properties: (nullable): A #GParquetWriterProperties. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: (nullable): A newly created #GParquetArrowFileWriter. * @@ -505,7 +505,7 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema, * @writer: A #GParquetArrowFileWriter. * @table: A table to be written. * @chunk_size: The max number of rows in a row group. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. * @@ -528,7 +528,7 @@ gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, /** * gparquet_arrow_file_writer_close: * @writer: A #GParquetArrowFileWriter. - * @error: (nullable): Return locatipcn for a #GError or %NULL. + * @error: (nullable): Return location for a #GError or %NULL. * * Returns: %TRUE on success, %FALSE if there was an error. * diff --git a/c_glib/parquet-glib/statistics.cpp b/c_glib/parquet-glib/statistics.cpp index 5dae756bf62e2..596768d98007c 100644 --- a/c_glib/parquet-glib/statistics.cpp +++ b/c_glib/parquet-glib/statistics.cpp @@ -169,7 +169,7 @@ gparquet_statistics_get_n_nulls(GParquetStatistics *statistics) } /** - * gparquet_statistics_has_n_distinct_valuess: + * gparquet_statistics_has_n_distinct_values: * @statistics: A #GParquetStatistics. * * Returns: %TRUE if the number of distinct values is set, %FALSE otherwise. diff --git a/c_glib/test/flight/test-client-options.rb b/c_glib/test/flight/test-client-options.rb index b1a67c60699c2..e1f3a2f850a44 100644 --- a/c_glib/test/flight/test-client-options.rb +++ b/c_glib/test/flight/test-client-options.rb @@ -51,7 +51,7 @@ def test_write_size_limit_bytes assert_equal(100, @options.write_size_limit_bytes) end - def test_disable_server_verifiation + def test_disable_server_verification assert do not @options.disable_server_verification? end diff --git a/c_glib/test/gandiva/test-native-function.rb b/c_glib/test/gandiva/test-native-function.rb index 630a1f7c32d2a..4d4d6fc7d3f66 100644 --- a/c_glib/test/gandiva/test-native-function.rb +++ b/c_glib/test/gandiva/test-native-function.rb @@ -59,7 +59,7 @@ def test_to_string modulo.to_s) end - sub_test_case("get_result_nullbale_type") do + sub_test_case("get_result_nullable_type") do def test_if_null assert_equal(Gandiva::ResultNullableType::IF_NULL, @not.result_nullable_type) diff --git a/c_glib/test/helper/readable.rb b/c_glib/test/helper/readable.rb index 81bf0795c6b50..cea5faf7681de 100644 --- a/c_glib/test/helper/readable.rb +++ b/c_glib/test/helper/readable.rb @@ -19,7 +19,7 @@ module Helper module Readable def read_table(input, type: :file) if input.is_a?(Arrow::Buffer) - input_stream = Arrow::BufferIntputStream.new(input) + input_stream = Arrow::BufferInputStream.new(input) else input_stream = Arrow::FileInputStream.new(input) end diff --git a/c_glib/test/test-array.rb b/c_glib/test/test-array.rb index c03aecf1732b3..8b96efca0b95b 100644 --- a/c_glib/test/test-array.rb +++ b/c_glib/test/test-array.rb @@ -141,12 +141,12 @@ def test_no_diff def test_diff array = build_string_array(["Start", "Shutdown", "Reboot"]) - other_array = build_string_array(["Start", "Shutdonw", "Reboot"]) + other_array = build_string_array(["Start", "Shutdown_", "Reboot"]) assert_equal(<<-STRING.chomp, array.diff_unified(other_array)) @@ -1, +1 @@ -"Shutdown" -+"Shutdonw" ++"Shutdown_" STRING end diff --git a/c_glib/test/test-decimal128-data-type.rb b/c_glib/test/test-decimal128-data-type.rb index 92f2f47f0bd71..f0e62c9d131b4 100644 --- a/c_glib/test/test-decimal128-data-type.rb +++ b/c_glib/test/test-decimal128-data-type.rb @@ -41,7 +41,7 @@ def test_scale assert_equal(2, data_type.scale) end - def test_deciaml_data_type_new + def test_decimal_data_type_new assert_equal(Arrow::Decimal128DataType.new(8, 2), Arrow::DecimalDataType.new(8, 2)) end diff --git a/c_glib/test/test-decimal256-data-type.rb b/c_glib/test/test-decimal256-data-type.rb index b26f7396043cf..6d803f7ce9020 100644 --- a/c_glib/test/test-decimal256-data-type.rb +++ b/c_glib/test/test-decimal256-data-type.rb @@ -41,7 +41,7 @@ def test_scale assert_equal(2, data_type.scale) end - def test_deciaml_data_type_new + def test_decimal_data_type_new assert_equal(Arrow::Decimal256DataType.new(39, 1), Arrow::DecimalDataType.new(39, 1)) end diff --git a/c_glib/test/test-dictionary-array-builder.rb b/c_glib/test/test-dictionary-array-builder.rb index 4531e44f4a4ee..2d14563c840c8 100644 --- a/c_glib/test/test-dictionary-array-builder.rb +++ b/c_glib/test/test-dictionary-array-builder.rb @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -class TestDictinaryArrayBuilder < Test::Unit::TestCase +class TestDictionaryArrayBuilder < Test::Unit::TestCase include Helper::Buildable def setup diff --git a/c_glib/test/test-extension-data-type.rb b/c_glib/test/test-extension-data-type.rb index 59c6395e98df2..6c114b81e2c33 100644 --- a/c_glib/test/test-extension-data-type.rb +++ b/c_glib/test/test-extension-data-type.rb @@ -91,7 +91,7 @@ def test_wrap_chunked_array ["a" * 16, nil]) storage2 = build_fixed_size_binary_array(data_type.storage_data_type, ["c" * 16]) - chunkd_array = Arrow::ChunkedArray.new([storage1, storage2]) + chunked_array = Arrow::ChunkedArray.new([storage1, storage2]) extension_chunked_array = data_type.wrap_chunked_array(chunked_array) assert_equal([ data_type, diff --git a/c_glib/test/test-fixed-size-binary-array.rb b/c_glib/test/test-fixed-size-binary-array.rb index 29189e78a7d9c..8de32c57a622b 100644 --- a/c_glib/test/test-fixed-size-binary-array.rb +++ b/c_glib/test/test-fixed-size-binary-array.rb @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -class TestFixedSizeBinaryrray < Test::Unit::TestCase +class TestFixedSizeBinaryArray < Test::Unit::TestCase include Helper::Buildable def setup diff --git a/ci/conan/all/conandata.yml b/ci/conan/all/conandata.yml index 376fdfbfa7ad5..7402272a4b366 100644 --- a/ci/conan/all/conandata.yml +++ b/ci/conan/all/conandata.yml @@ -45,21 +45,21 @@ sources: patches: "8.0.1": - patch_file: "patches/8.0.0-0005-install-utils.patch" - patch_description: "enable utilis installation" + patch_description: "enable utils installation" patch_type: "conan" - patch_file: "patches/8.0.0-0006-fix-cmake.patch" patch_description: "use cci package" patch_type: "conan" "8.0.0": - patch_file: "patches/8.0.0-0005-install-utils.patch" - patch_description: "enable utilis installation" + patch_description: "enable utils installation" patch_type: "conan" - patch_file: "patches/8.0.0-0006-fix-cmake.patch" patch_description: "use cci package" patch_type: "conan" "7.0.0": - patch_file: "patches/7.0.0-0006-install-utils.patch" - patch_description: "enable utilis installation" + patch_description: "enable utils installation" patch_type: "conan" - patch_file: "patches/7.0.0-0007-fix-cmake.patch" patch_description: "use cci package" diff --git a/ci/conan/all/conanfile.py b/ci/conan/all/conanfile.py index b32219f63c8e5..7e87f82e7e018 100644 --- a/ci/conan/all/conanfile.py +++ b/ci/conan/all/conanfile.py @@ -328,7 +328,7 @@ def _with_boost(self, required=False): return bool(self.options.with_boost) def _with_thrift(self, required=False): - # No self.options.with_thift exists + # No self.options.with_thrift exists return bool(required or self._parquet()) def _with_utf8proc(self, required=False): diff --git a/ci/conan/all/test_package/test_package.cpp b/ci/conan/all/test_package/test_package.cpp index 42cab6cc76e7a..fb54b040f44e6 100644 --- a/ci/conan/all/test_package/test_package.cpp +++ b/ci/conan/all/test_package/test_package.cpp @@ -78,7 +78,7 @@ arrow::Status VectorToColumnarTable(const std::vector& rows, // Indicate the start of a new list row. This will memorise the current // offset in the values builder. ARROW_RETURN_NOT_OK(components_builder.Append()); - // Store the actual values. The final nullptr argument tells the underyling + // Store the actual values. The final nullptr argument tells the underlying // builder that all added values are valid, i.e. non-null. ARROW_RETURN_NOT_OK(cost_components_builder.AppendValues(row.cost_components.data(), row.cost_components.size())); diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index b635e5e93455c..7a54dcc86f8fa 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -22,7 +22,7 @@ FROM ${repo}:${arch}-conda COPY ci/scripts/install_minio.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda -# Unless overriden use Python 3.10 +# Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. ARG python=3.10 diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index 207bea72b5375..8b73c73c1d240 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -18,7 +18,7 @@ ARG base FROM ${base} -# Install the libaries required by the Gandiva to run +# Install the libraries required by the Gandiva to run # Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva RUN vcpkg install \ --clean-after-build \ diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile index 0f7779c878505..0a50d450c225a 100644 --- a/ci/docker/python-wheel-manylinux.dockerfile +++ b/ci/docker/python-wheel-manylinux.dockerfile @@ -73,7 +73,7 @@ ENV CMAKE_BUILD_TYPE=${build_type} \ VCPKG_FEATURE_FLAGS="manifests" COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/ # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains -# ssl related fixies as well as we can patch the vcpkg portfile to support +# ssl related fixes as well as we can patch the vcpkg portfile to support # arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186 # but we cannot patch those portfiles since vcpkg-tool handles the checkout of # previous versions => use bundled S3 build diff --git a/ci/docker/python-wheel-windows-vs2017.dockerfile b/ci/docker/python-wheel-windows-vs2017.dockerfile index b6a8125a5c4fc..faf07800c956a 100644 --- a/ci/docker/python-wheel-windows-vs2017.dockerfile +++ b/ci/docker/python-wheel-windows-vs2017.dockerfile @@ -55,7 +55,7 @@ ENV CMAKE_BUILD_TYPE=${build_type} \ VCPKG_FEATURE_FLAGS="manifests" COPY ci/vcpkg/vcpkg.json arrow/ci/vcpkg/ # cannot use the S3 feature here because while aws-sdk-cpp=1.9.160 contains -# ssl related fixies as well as we can patch the vcpkg portfile to support +# ssl related fixes as well as we can patch the vcpkg portfile to support # arm machines it hits ARROW-15141 where we would need to fall back to 1.8.186 # but we cannot patch those portfiles since vcpkg-tool handles the checkout of # previous versions => use bundled S3 build @@ -97,4 +97,4 @@ RUN python -m pip install -r arrow/python/requirements-wheel-build.txt # For debugging purposes # RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip -# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Depencencies" +# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Dependencies" diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh index 8487090f7ad26..5f962f02b911b 100755 --- a/ci/scripts/install_python.sh +++ b/ci/scripts/install_python.sh @@ -21,7 +21,7 @@ set -eu declare -A platforms platforms=([windows]=Windows - [macos]=MacOSX + [macos]=macOS [linux]=Linux) declare -A versions @@ -43,7 +43,7 @@ platform=${platforms[$1]} version=$2 full_version=${versions[$2]} -if [ $platform = "MacOSX" ]; then +if [ $platform = "macOS" ]; then echo "Downloading Python installer..." if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ] || [ "$version" = "3.11" ] || [ "$version" = "3.12" ]; then diff --git a/ci/scripts/install_sccache.sh b/ci/scripts/install_sccache.sh index 902fb69ec6b67..0346c0cc9ce7d 100755 --- a/ci/scripts/install_sccache.sh +++ b/ci/scripts/install_sccache.sh @@ -56,7 +56,7 @@ if [ ! -d $PREFIX ]; then mkdir -p $PREFIX fi -# Extract only the sccache binary into $PREFIX and ignore README and LCIENSE. +# Extract only the sccache binary into $PREFIX and ignore README and LICENSE. # --wildcards doesn't work on busybox. tar -xzvf $SCCACHE_ARCHIVE --strip-component=1 --directory $PREFIX --exclude="sccache*/*E*E*" chmod u+x $PREFIX/sccache diff --git a/ci/scripts/integration_spark.sh b/ci/scripts/integration_spark.sh index 6e20e77032952..424ac5994653a 100755 --- a/ci/scripts/integration_spark.sh +++ b/ci/scripts/integration_spark.sh @@ -27,7 +27,7 @@ test_pyarrow_only=${3:-false} # Spark branch to checkout spark_version=${SPARK_VERSION:-master} -# Use old behavior that always dropped tiemzones. +# Use old behavior that always dropped timezones. export PYARROW_IGNORE_TIMEZONE=1 if [ "${SPARK_VERSION:1:2}" == "2." ]; then diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh index fe015cbb0c412..a14cb803ca898 100755 --- a/ci/scripts/r_valgrind.sh +++ b/ci/scripts/r_valgrind.sh @@ -33,7 +33,7 @@ ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz pushd tests # to generate suppression files run: -# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.supp ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out # valgrind --error-exitcode=1 should return an erroring exit code that we can catch, diff --git a/cmake-format.py b/cmake-format.py index 3e77733f4d1ee..b8fc8939692da 100644 --- a/cmake-format.py +++ b/cmake-format.py @@ -72,5 +72,5 @@ first_comment_is_literal = True # If comment markup is enabled, don't reflow any comment block which - # matchesthis (regex) pattern. Default is `None` (disabled). + # matches this (regex) pattern. Default is `None` (disabled). literal_comment_pattern = None diff --git a/cpp/CHANGELOG_PARQUET.md b/cpp/CHANGELOG_PARQUET.md index 06a09c20f0ef9..68aa8386b5186 100644 --- a/cpp/CHANGELOG_PARQUET.md +++ b/cpp/CHANGELOG_PARQUET.md @@ -4,7 +4,7 @@ Parquet C++ 1.5.0 * [PARQUET-979] - [C++] Limit size of min, max or disable stats for long binary types * [PARQUET-1071] - [C++] parquet::arrow::FileWriter::Close is not idempotent * [PARQUET-1349] - [C++] PARQUET_RPATH_ORIGIN is not picked by the build - * [PARQUET-1334] - [C++] memory_map parameter seems missleading in parquet file opener + * [PARQUET-1334] - [C++] memory_map parameter seems misleading in parquet file opener * [PARQUET-1333] - [C++] Reading of files with dictionary size 0 fails on Windows with bad_alloc * [PARQUET-1283] - [C++] FormatStatValue appends trailing space to string and int96 * [PARQUET-1270] - [C++] Executable tools do not get installed @@ -13,7 +13,7 @@ Parquet C++ 1.5.0 * [PARQUET-1255] - [C++] Exceptions thrown in some tests * [PARQUET-1358] - [C++] index_page_offset should be unset as it is not supported. * [PARQUET-1357] - [C++] FormatStatValue truncates binary statistics on zero character - * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for MacOS + * [PARQUET-1319] - [C++] Pass BISON_EXECUTABLE to Thrift EP for macOS * [PARQUET-1313] - [C++] Compilation failure with VS2017 * [PARQUET-1315] - [C++] ColumnChunkMetaData.has_dictionary_page() should return bool, not int64_t * [PARQUET-1307] - [C++] memory-test fails with latest Arrow @@ -28,7 +28,7 @@ Parquet C++ 1.5.0 * [PARQUET-1346] - [C++] Protect against null values data in empty Arrow array * [PARQUET-1340] - [C++] Fix Travis Ci valgrind errors related to std::random_device * [PARQUET-1323] - [C++] Fix compiler warnings with clang-6.0 - * [PARQUET-1279] - Use ASSERT_NO_FATAIL_FAILURE in C++ unit tests + * [PARQUET-1279] - Use ASSERT_NO_FATAL_FAILURE in C++ unit tests * [PARQUET-1262] - [C++] Use the same BOOST_ROOT and Boost_NAMESPACE for Thrift * [PARQUET-1267] - replace "unsafe" std::equal by std::memcmp * [PARQUET-1360] - [C++] Minor API + style changes follow up to PARQUET-1348 @@ -89,7 +89,7 @@ Parquet C++ 1.4.0 ## New Feature * [PARQUET-1095] - [C++] Read and write Arrow decimal values - * [PARQUET-970] - Add Add Lz4 and Zstd compression codecs + * [PARQUET-970] - Add Lz4 and Zstd compression codecs ## Task * [PARQUET-1221] - [C++] Extend release README @@ -233,10 +233,10 @@ Parquet C++ 1.1.0 * [PARQUET-977] - Improve MSVC build * [PARQUET-957] - [C++] Add optional $PARQUET_BUILD_TOOLCHAIN environment variable option for configuring build environment * [PARQUET-961] - [C++] Strip debug symbols from libparquet libraries in release builds by default - * [PARQUET-954] - C++: Use Brolti 0.6 release + * [PARQUET-954] - C++: Use Brotli 0.6 release * [PARQUET-953] - [C++] Change arrow::FileWriter API to be initialized from a Schema, and provide for writing multiple tables * [PARQUET-941] - [C++] Stop needless Boost static library detection for CentOS 7 support - * [PARQUET-942] - [C++] Fix wrong variabe use in FindSnappy + * [PARQUET-942] - [C++] Fix wrong variable use in FindSnappy * [PARQUET-939] - [C++] Support Thrift_HOME CMake variable like FindSnappy does as Snappy_HOME * [PARQUET-940] - [C++] Fix Arrow library path detection * [PARQUET-937] - [C++] Support CMake < 3.4 again for Arrow detection @@ -278,7 +278,7 @@ Parquet C++ 1.0.0 * [PARQUET-614] - C++: Remove unneeded LZ4-related code * [PARQUET-604] - Install writer.h headers * [PARQUET-621] - C++: Uninitialised DecimalMetadata is read - * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metdata writes + * [PARQUET-620] - C++: Duplicate calls to ParquetFileWriter::Close cause duplicate metadata writes * [PARQUET-599] - ColumnWriter::RleEncodeLevels' size estimation might be wrong * [PARQUET-617] - C++: Enable conda build to work on systems with non-default C++ toolchains * [PARQUET-627] - Ensure that thrift headers are generated before source compilation @@ -339,7 +339,7 @@ Parquet C++ 1.0.0 * [PARQUET-626] - Fix builds due to unavailable llvm.org apt mirror * [PARQUET-629] - RowGroupSerializer should only close itself once * [PARQUET-472] - Clean up InputStream ownership semantics in ColumnReader - * [PARQUET-739] - Rle-decoding uses static buffer that is shared accross threads + * [PARQUET-739] - Rle-decoding uses static buffer that is shared across threads * [PARQUET-561] - ParquetFileReader::Contents PIMPL missing a virtual destructor * [PARQUET-892] - [C++] Clean up link library targets in CMake files * [PARQUET-454] - Address inconsistencies in boolean decoding @@ -401,12 +401,12 @@ Parquet C++ 1.0.0 * [PARQUET-653] - [C++] Re-enable -static-libstdc++ in dev artifact builds * [PARQUET-763] - C++: Expose ParquetFileReader through Arrow reader * [PARQUET-857] - [C++] Flatten parquet/encodings directory - * [PARQUET-862] - Provide defaut cache size values if CPU info probing is not available + * [PARQUET-862] - Provide default cache size values if CPU info probing is not available * [PARQUET-689] - C++: Compress DataPages eagerly * [PARQUET-874] - [C++] Use default memory allocator from Arrow * [PARQUET-267] - Detach thirdparty code from build configuration. * [PARQUET-418] - Add a utility to print contents of a Parquet file to stdout - * [PARQUET-519] - Disable compiler warning supressions and fix all DEBUG build warnings + * [PARQUET-519] - Disable compiler warning suppressions and fix all DEBUG build warnings * [PARQUET-447] - Add Debug and Release build types and associated compiler flags * [PARQUET-868] - C++: Build snappy with optimizations * [PARQUET-894] - Fix compilation warning diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bcb298407bd8b..d8dd3390aa62a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -75,7 +75,7 @@ set(ARROW_VERSION "15.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") -# if no build build type is specified, default to release builds +# if no build type is specified, default to release builds if(NOT DEFINED CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index a15b204c39757..9d99b3b2a79e0 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -428,7 +428,7 @@ "base-benchmarks", "features-maximal" ], - "displayName": "Benchmarking build with with everything enabled", + "displayName": "Benchmarking build with everything enabled", "cacheVariables": {} }, { diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile index baa3b41e693c4..e19c933cd454f 100644 --- a/cpp/apidoc/Doxyfile +++ b/cpp/apidoc/Doxyfile @@ -239,7 +239,7 @@ QT_AUTOBRIEF = NO # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. +# not recognized anymore. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO @@ -569,7 +569,7 @@ INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file # names in lower-case letters. If set to YES, upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows +# in case and if your file system supports case-sensitive file names. Windows # (including Cygwin) ands Mac users are advised to set this option to NO. # The default value is: system dependent. @@ -734,7 +734,7 @@ SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the +# popen()) the command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. diff --git a/cpp/build-support/cpplint.py b/cpp/build-support/cpplint.py index cf1859bb6d48e..642b7b67208bc 100755 --- a/cpp/build-support/cpplint.py +++ b/cpp/build-support/cpplint.py @@ -873,7 +873,7 @@ # Files to exclude from linting. This is set by the --exclude flag. _excludes = None -# Whether to supress all PrintInfo messages, UNRELATED to --quiet flag +# Whether to suppress all PrintInfo messages, UNRELATED to --quiet flag _quiet = False # The allowed line length of files. @@ -1001,7 +1001,7 @@ def ParseNolintSuppressions(filename, raw_line, linenum, error): 'Unknown NOLINT error category: %s' % category) -def ProcessGlobalSuppresions(lines): +def ProcessGlobalSuppressions(lines): """Updates the list of global error suppressions. Parses any lint directives in the file that have global effect. @@ -1029,7 +1029,7 @@ def IsErrorSuppressedByNolint(category, linenum): """Returns true if the specified error category is suppressed on this line. Consults the global error_suppressions map populated by - ParseNolintSuppressions/ProcessGlobalSuppresions/ResetNolintSuppressions. + ParseNolintSuppressions/ProcessGlobalSuppressions/ResetNolintSuppressions. Args: category: str, the category of the error. @@ -1246,7 +1246,7 @@ def CheckNextIncludeOrder(self, header_type): if self._section <= self._MY_H_SECTION: self._section = self._MY_H_SECTION else: - # This will always be the fallback because we're not sure + # This will always be the fallback because we aren't sure # enough that the header is associated with this file. self._section = self._OTHER_H_SECTION else: @@ -1271,7 +1271,7 @@ def __init__(self): self._filters_backup = self.filters[:] self.counting = 'total' # In what way are we counting errors? self.errors_by_category = {} # string to int dict storing error counts - self.quiet = False # Suppress non-error messagess? + self.quiet = False # Suppress non-error messages? # output format: # "emacs" - format that emacs can parse (default) @@ -1599,7 +1599,7 @@ def RepositoryName(self): repo = FileInfo(_repository).FullName() root_dir = project_dir while os.path.exists(root_dir): - # allow case insensitive compare on Windows + # allow case-insensitive compare on Windows if os.path.normcase(root_dir) == os.path.normcase(repo): return os.path.relpath(fullname, root_dir).replace('\\', '/') one_up_dir = os.path.dirname(root_dir) @@ -1765,7 +1765,7 @@ def Error(filename, linenum, category, confidence, message): def IsCppString(line): """Does line terminate so, that the next symbol is in string constant. - This function does not consider single-line nor multi-line comments. + This function does not consider comments at all. Args: line: is a partial line of code starting from the 0..n. @@ -1947,7 +1947,7 @@ def NumLines(self): def _CollapseStrings(elided): """Collapses strings and chars on a line to simple "" or '' blocks. - We nix strings first so we're not fooled by text like '"http://"' + We nix strings first so we aren't fooled by text like '"http://"' Args: elided: The line being processed. @@ -3481,7 +3481,7 @@ def CheckSpacingForFunctionCall(filename, clean_lines, linenum, error): # for nested parens ( (a+b) + c ). Likewise, there should never be # a space before a ( when it's a function argument. I assume it's a # function argument when the char before the whitespace is legal in - # a function name (alnum + _) and we're not starting a macro. Also ignore + # a function name (alnum + _) and we aren't starting a macro. Also ignore # pointers and references to arrays and functions coz they're too tricky: # we use a very simple way to recognize these: # " (something)(maybe-something)" or @@ -3870,7 +3870,7 @@ def CheckOperatorSpacing(filename, clean_lines, linenum, error): elif not Match(r'#.*include', line): # Look for < that is not surrounded by spaces. This is only # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a + # technically it should flag if at least one side is missing a # space. This is done to avoid some false positives with shifts. match = Match(r'^(.*[^\s<])<[^\s=<,]', line) if match: @@ -6155,7 +6155,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, header_found = True # If we can't find the header file for a .cc, assume it's because we don't - # know where to look. In that case we'll give up as we're not sure they + # know where to look. In that case we'll give up as we aren't sure they # didn't include it in the .h file. # TODO(unknown): Do a better job of finding .h files so we are confident that # not having the .h file means there isn't one. @@ -6495,7 +6495,7 @@ def ProcessFileData(filename, file_extension, lines, error, ResetNolintSuppressions() CheckForCopyright(filename, lines, error) - ProcessGlobalSuppresions(lines) + ProcessGlobalSuppressions(lines) RemoveMultiLineComments(filename, lines, error) clean_lines = CleansedLines(lines) diff --git a/cpp/build-support/iwyu/mappings/boost-all.imp b/cpp/build-support/iwyu/mappings/boost-all.imp index 5427ae2ac54be..7c48acaf34163 100644 --- a/cpp/build-support/iwyu/mappings/boost-all.imp +++ b/cpp/build-support/iwyu/mappings/boost-all.imp @@ -57,7 +57,7 @@ { include: ["@", private, "", public ] }, #manually delete $ sed '/workarounds*\.hpp/d' -i boost-all.imp #also good idea to remove all lines referring to folders above (e.g., sed '/\/format\//d' -i boost-all.imp) -#programatically include: +#programmatically include: { include: ["", private, "", public ] }, { include: ["", private, "", public ] }, { include: ["", private, "", public ] }, diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 8e8f687d06539..6940c6befacc7 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -73,7 +73,7 @@ if(ARROW_CPU_FLAG STREQUAL "x86") message(STATUS "Disable AVX512 support on MINGW for now") else() # Check for AVX512 support in the compiler. - set(OLD_CMAKE_REQURED_FLAGS ${CMAKE_REQUIRED_FLAGS}) + set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${ARROW_AVX512_FLAG}") check_cxx_source_compiles(" #ifdef _MSC_VER @@ -89,7 +89,7 @@ if(ARROW_CPU_FLAG STREQUAL "x86") return 0; }" CXX_SUPPORTS_AVX512) - set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQURED_FLAGS}) + set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS}) endif() endif() # Runtime SIMD level it can get from compiler and ARROW_RUNTIME_SIMD_LEVEL @@ -459,7 +459,7 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STRE if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20) # Avoid C++17 std::get 'not available' issue on macOS 10.13 - # This will be required until atleast R 4.4 is released and + # This will be required until at least R 4.4 is released and # CRAN (hopefully) stops checking on 10.13 string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY") endif() @@ -527,7 +527,7 @@ if(ARROW_CPU_FLAG STREQUAL "aarch64") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msve-vector-bits=${SVE_VECTOR_BITS}") else() set(ARROW_HAVE_SVE_SIZELESS ON) - add_definitions(-DARROW_HAVE_SVE_SIZELSS) + add_definitions(-DARROW_HAVE_SVE_SIZELESS) endif() endif() set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=${ARROW_ARMV8_MARCH}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 17ad8f45424c4..99d051acff932 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1328,8 +1328,8 @@ macro(build_snappy) set(SNAPPY_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") - # Snappy unconditionaly enables Werror when building with clang this can lead - # to build failues by way of new compiler warnings. This adds a flag to disable + # Snappy unconditionally enables -Werror when building with clang this can lead + # to build failures by way of new compiler warnings. This adds a flag to disable # Werror to the very end of the invocation to override the snappy internal setting. if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO) @@ -4238,7 +4238,7 @@ macro(build_google_cloud_cpp_storage) target_include_directories(google-cloud-cpp::common BEFORE INTERFACE "${GOOGLE_CLOUD_CPP_INCLUDE_DIR}") # Refer to https://github.com/googleapis/google-cloud-cpp/blob/main/google/cloud/google_cloud_cpp_common.cmake - # (subsitute `main` for the SHA of the version we use) + # (substitute `main` for the SHA of the version we use) # Version 1.39.0 is at a different place (they refactored after): # https://github.com/googleapis/google-cloud-cpp/blob/29e5af8ca9b26cec62106d189b50549f4dc1c598/google/cloud/CMakeLists.txt#L146-L155 target_link_libraries(google-cloud-cpp::common @@ -5071,7 +5071,7 @@ if(ARROW_S3) if(APPLE) # CoreFoundation's path is hardcoded in the CMake files provided by - # aws-sdk-cpp to use the MacOSX SDK provided by XCode which makes + # aws-sdk-cpp to use the macOS SDK provided by XCode which makes # XCode a hard dependency. Command Line Tools is often used instead # of the full XCode suite, so let the linker to find it. set_target_properties(AWS::aws-c-common diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake index ee2cfbc670c03..b6192468da342 100644 --- a/cpp/cmake_modules/Usevcpkg.cmake +++ b/cpp/cmake_modules/Usevcpkg.cmake @@ -20,7 +20,7 @@ message(STATUS "Using vcpkg to find dependencies") # ---------------------------------------------------------------------- # Define macros -# macro to list subdirectirectories (non-recursive) +# macro to list subdirectories (non-recursive) macro(list_subdirs SUBDIRS DIR) file(GLOB children_ RELATIVE ${DIR} diff --git a/cpp/examples/arrow/execution_plan_documentation_examples.cc b/cpp/examples/arrow/execution_plan_documentation_examples.cc index 00a23be293510..b92f5801c140d 100644 --- a/cpp/examples/arrow/execution_plan_documentation_examples.cc +++ b/cpp/examples/arrow/execution_plan_documentation_examples.cc @@ -342,7 +342,7 @@ arrow::Status TableSourceSinkExample() { /// /// Source-Filter-Table /// This example shows how a filter can be used in an execution plan, -/// to filter data from a source. The output from the exeuction plan +/// to filter data from a source. The output from the execution plan /// is collected into a table. arrow::Status ScanFilterSinkExample() { ARROW_ASSIGN_OR_RAISE(std::shared_ptr dataset, GetDataset()); diff --git a/cpp/examples/arrow/rapidjson_row_converter.cc b/cpp/examples/arrow/rapidjson_row_converter.cc index 3907e72121c6d..7448e9d04e564 100644 --- a/cpp/examples/arrow/rapidjson_row_converter.cc +++ b/cpp/examples/arrow/rapidjson_row_converter.cc @@ -75,7 +75,7 @@ class RowBatchBuilder { // Default implementation arrow::Status Visit(const arrow::Array& array) { return arrow::Status::NotImplemented( - "Can not convert to json document for array of type ", array.type()->ToString()); + "Cannot convert to json document for array of type ", array.type()->ToString()); } // Handles booleans, integers, floats @@ -346,7 +346,7 @@ class JsonValueConverter { // Default implementation arrow::Status Visit(const arrow::DataType& type) { return arrow::Status::NotImplemented( - "Can not convert json value to Arrow array of type ", type.ToString()); + "Cannot convert json value to Arrow array of type ", type.ToString()); } arrow::Status Visit(const arrow::Int64Type& type) { diff --git a/cpp/src/arrow/acero/accumulation_queue.h b/cpp/src/arrow/acero/accumulation_queue.h index 285790207f93c..a27b8b399ce47 100644 --- a/cpp/src/arrow/acero/accumulation_queue.h +++ b/cpp/src/arrow/acero/accumulation_queue.h @@ -82,7 +82,7 @@ class SequencingQueue { /// This method will be called on each batch in order. Calls to this method /// will be serialized and it will not be called reentrantly. This makes it /// safe to do things that rely on order but minimal time should be spent here - /// to avoid becoming a bottlneck. + /// to avoid becoming a bottleneck. /// /// \return a follow-up task that will be scheduled. The follow-up task(s) are /// is not guaranteed to run in any particular order. If nullopt is diff --git a/cpp/src/arrow/acero/aggregate_internal.h b/cpp/src/arrow/acero/aggregate_internal.h index 72537a7f7e3fe..5730d99f93f88 100644 --- a/cpp/src/arrow/acero/aggregate_internal.h +++ b/cpp/src/arrow/acero/aggregate_internal.h @@ -224,7 +224,7 @@ class ScalarAggregateNode : public ExecNode, public TracedNode { // Field indices corresponding to the segment-keys const std::vector segment_field_ids_; // Holds the value of segment keys of the most recent input batch - // The values are updated everytime an input batch is processed + // The values are updated every time an input batch is processed std::vector segmenter_values_; const std::vector> target_fieldsets_; diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 4a3b6b199c4c0..2609905a0b552 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -472,7 +472,7 @@ class BackpressureController : public BackpressureControl { }; class InputState { - // InputState correponds to an input + // InputState corresponds to an input // Input record batches are queued up in InputState until processed and // turned into output record batches. @@ -1453,7 +1453,7 @@ class AsofJoinNode : public ExecNode { bool must_hash_; bool may_rehash_; // InputStates - // Each input state correponds to an input table + // Each input state corresponds to an input table std::vector> state_; std::mutex gate_; TolType tolerance_; diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index df3172b2a09bc..e400cc031693a 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -604,7 +604,7 @@ struct BasicTest { auto r0_types = init_types(all_types, [](T& t) { return t->byte_width() > 1; }); auto r1_types = init_types(all_types, [](T& t) { return t->byte_width() > 1; }); - // sample a limited number of type-combinations to keep the runnning time reasonable + // sample a limited number of type-combinations to keep the running time reasonable // the scoped-traces below help reproduce a test failure, should it happen auto start_time = std::chrono::system_clock::now(); auto seed = start_time.time_since_epoch().count(); @@ -1279,7 +1279,7 @@ TRACED_TEST(AsofJoinTest, TestUnsupportedOntype, { field("r0_v0", float32())})); }) -TRACED_TEST(AsofJoinTest, TestUnsupportedBytype, { +TRACED_TEST(AsofJoinTest, TestUnsupportedByType, { DoRunInvalidTypeTest(schema({field("time", int64()), field("key", list(int32())), field("l_v0", float64())}), schema({field("time", int64()), field("key", list(int32())), diff --git a/cpp/src/arrow/acero/expression_benchmark.cc b/cpp/src/arrow/acero/expression_benchmark.cc index 9799446ed6524..a57dd6b9e3f22 100644 --- a/cpp/src/arrow/acero/expression_benchmark.cc +++ b/cpp/src/arrow/acero/expression_benchmark.cc @@ -107,7 +107,7 @@ static void ExecuteScalarExpressionOverhead(benchmark::State& state, Expression } /// \brief Baseline benchmarks are implemented in pure C++ without arrow for performance -/// comparision. +/// comparison. template void ExecuteScalarExpressionBaseline(benchmark::State& state) { const auto rows_per_batch = static_cast(state.range(0)); @@ -193,7 +193,7 @@ BENCHMARK_CAPTURE(BindAndEvaluate, nested_scalar, struct ComplexExpressionBaseline { public: ComplexExpressionBaseline(size_t input_size) { - /* hack - cuts off a few elemets if the input size is not a multiple of 64 for + /* hack - cuts off a few elements if the input size is not a multiple of 64 for * simplicity. We can't use std::vector here since it slows down things * massively */ less_20.resize(input_size / 64); diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 02e67927cc03f..a4874f3581040 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -261,7 +261,7 @@ Result MakeGroupByOutput(const std::vector& output_batches, return struct_arr; } - // The exec plan may reorder the output rows. The tests are all setup to expect ouptut + // The exec plan may reorder the output rows. The tests are all setup to expect output // in ascending order of keys. So we need to sort the result by the key columns. To do // that we create a table using the key columns, calculate the sort indices from that // table (sorting on all fields) and then use those indices to calculate our result. diff --git a/cpp/src/arrow/acero/partition_util.h b/cpp/src/arrow/acero/partition_util.h index 27cde61d58797..1413a8326ade0 100644 --- a/cpp/src/arrow/acero/partition_util.h +++ b/cpp/src/arrow/acero/partition_util.h @@ -33,11 +33,11 @@ class PartitionSort { public: /// \brief Bucket sort rows on partition ids in O(num_rows) time. /// - /// Include in the output exclusive cummulative sum of bucket sizes. + /// Include in the output exclusive cumulative sum of bucket sizes. /// This corresponds to ranges in the sorted array containing all row ids for /// each of the partitions. /// - /// prtn_ranges must be initailized and have at least prtn_ranges + 1 elements + /// prtn_ranges must be initialized and have at least prtn_ranges + 1 elements /// when this method returns prtn_ranges[i] will contains the total number of /// elements in partitions 0 through i. prtn_ranges[0] will be 0. /// diff --git a/cpp/src/arrow/acero/pivot_longer_node.cc b/cpp/src/arrow/acero/pivot_longer_node.cc index e54f00a20be3f..ea5ca44baa10b 100644 --- a/cpp/src/arrow/acero/pivot_longer_node.cc +++ b/cpp/src/arrow/acero/pivot_longer_node.cc @@ -135,7 +135,7 @@ class PivotLongerNode : public ExecNode, public TracedNode { for (std::size_t i = 0; i < measurement_types.size(); i++) { if (!measurement_types[i]) { return Status::Invalid( - "All row templates had nullopt for the meausrement column at index ", i, " (", + "All row templates had nullopt for the measurement column at index ", i, " (", options.measurement_field_names[i], ")"); } fields.push_back( diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc index f3b934eda186b..4d4565a6bb5e7 100644 --- a/cpp/src/arrow/acero/sorted_merge_node.cc +++ b/cpp/src/arrow/acero/sorted_merge_node.cc @@ -95,7 +95,7 @@ class BackpressureController : public BackpressureControl { std::atomic& backpressure_counter_; }; -/// InputState correponds to an input. Input record batches are queued up in InputState +/// InputState corresponds to an input. Input record batches are queued up in InputState /// until processed and turned into output record batches. class InputState { public: diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 3f11b89af39de..2f79ed299bb70 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -1433,16 +1433,16 @@ void SwissTableForJoinBuild::PrtnMerge(int prtn_id) { if (!no_payload_) { // Count sort payloads on key id // - // Start by computing inclusive cummulative sum of counters. + // Start by computing inclusive cumulative sum of counters. // uint32_t sum = 0; for (int64_t i = 0; i < num_keys; ++i) { sum += counters[i]; counters[i] = sum; } - // Now use cummulative sum of counters to obtain the target position in + // Now use cumulative sum of counters to obtain the target position in // the sorted order for each row. At the end of this process the counters - // will contain exclusive cummulative sum (instead of inclusive that is + // will contain exclusive cumulative sum (instead of inclusive that is // there at the beginning). // source_payload_ids.resize(prtn_state.key_ids.size()); @@ -1458,7 +1458,7 @@ void SwissTableForJoinBuild::PrtnMerge(int prtn_id) { } } else { // When there is no payload to process, we just need to compute exclusive - // cummulative sum of counters and add the base payload id to all of them. + // cumulative sum of counters and add the base payload id to all of them. // uint32_t sum = 0; for (int64_t i = 0; i < num_keys; ++i) { diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h index 88b80f06f57f2..6403b7a655e96 100644 --- a/cpp/src/arrow/acero/swiss_join_internal.h +++ b/cpp/src/arrow/acero/swiss_join_internal.h @@ -156,7 +156,7 @@ class RowArrayMerge { // All input sources must be initialized, but they can contain zero rows. // // Output in vector the first target row id for each source (exclusive - // cummulative sum of number of rows in sources). This output is optional, + // cumulative sum of number of rows in sources). This output is optional, // caller can pass in nullptr to indicate that it is not needed. // static Status PrepareForMerge(RowArray* target, const std::vector& sources, @@ -235,7 +235,7 @@ class SwissTableMerge { // All input sources must be initialized, but they can be empty. // // Output in a vector the first target group id for each source (exclusive - // cummulative sum of number of groups in sources). This output is optional, + // cumulative sum of number of groups in sources). This output is optional, // caller can pass in nullptr to indicate that it is not needed. // static Status PrepareForMerge(SwissTable* target, diff --git a/cpp/src/arrow/adapters/orc/adapter.h b/cpp/src/arrow/adapters/orc/adapter.h index 013be78600a8f..4ffff81f355f1 100644 --- a/cpp/src/arrow/adapters/orc/adapter.h +++ b/cpp/src/arrow/adapters/orc/adapter.h @@ -138,7 +138,7 @@ class ARROW_EXPORT ORCFileReader { /// \brief Get a stripe level record batch iterator. /// /// Each record batch will have up to `batch_size` rows. - /// NextStripeReader serves as a fine grained alternative to ReadStripe + /// NextStripeReader serves as a fine-grained alternative to ReadStripe /// which may cause OOM issues by loading the whole stripe into memory. /// /// Note this will only read rows for the current stripe, not the entire @@ -151,7 +151,7 @@ class ARROW_EXPORT ORCFileReader { /// \brief Get a stripe level record batch iterator. /// /// Each record batch will have up to `batch_size` rows. - /// NextStripeReader serves as a fine grained alternative to ReadStripe + /// NextStripeReader serves as a fine-grained alternative to ReadStripe /// which may cause OOM issues by loading the whole stripe into memory. /// /// Note this will only read rows for the current stripe, not the entire @@ -256,7 +256,7 @@ class ARROW_EXPORT ORCFileReader { int64_t GetFileLength(); /// \brief Get the serialized file tail. - /// Usefull if another reader of the same file wants to avoid re-reading + /// Useful if another reader of the same file wants to avoid re-reading /// the file tail. See ReadOptions.SetSerializedFileTail(). /// /// \return a string of bytes with the file tail diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h index fd68a379ddbfb..19fdee61243d1 100644 --- a/cpp/src/arrow/array/array_binary.h +++ b/cpp/src/arrow/array/array_binary.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Array accessor classes for Binary, LargeBinart, String, LargeString, +// Array accessor classes for Binary, LargeBinary, String, LargeString, // FixedSizeBinary #pragma once diff --git a/cpp/src/arrow/array/array_dict.cc b/cpp/src/arrow/array/array_dict.cc index 28fccdbfcffee..7fd76a1dae81b 100644 --- a/cpp/src/arrow/array/array_dict.cc +++ b/cpp/src/arrow/array/array_dict.cc @@ -212,7 +212,7 @@ Result> TransposeDictIndices( return out_data; } -struct CompactTransposeMapVistor { +struct CompactTransposeMapVisitor { const std::shared_ptr& data; arrow::MemoryPool* pool; std::unique_ptr output_map; @@ -306,11 +306,11 @@ Result> CompactTransposeMap( } const auto& dict_type = checked_cast(*data->type); - CompactTransposeMapVistor vistor{data, pool, nullptr, nullptr}; - RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), &vistor)); + CompactTransposeMapVisitor visitor{data, pool, nullptr, nullptr}; + RETURN_NOT_OK(VisitTypeInline(*dict_type.index_type(), &visitor)); - out_compact_dictionary = vistor.out_compact_dictionary; - return std::move(vistor.output_map); + out_compact_dictionary = visitor.out_compact_dictionary; + return std::move(visitor.output_map); } } // namespace diff --git a/cpp/src/arrow/array/array_dict.h b/cpp/src/arrow/array/array_dict.h index 9aa0a7bcc2d66..bf376b51f8c94 100644 --- a/cpp/src/arrow/array/array_dict.h +++ b/cpp/src/arrow/array/array_dict.h @@ -133,7 +133,7 @@ class ARROW_EXPORT DictionaryUnifier { static Result> Make( std::shared_ptr value_type, MemoryPool* pool = default_memory_pool()); - /// \brief Unify dictionaries accross array chunks + /// \brief Unify dictionaries across array chunks /// /// The dictionaries in the array chunks will be unified, their indices /// accordingly transposed. @@ -144,7 +144,7 @@ class ARROW_EXPORT DictionaryUnifier { const std::shared_ptr& array, MemoryPool* pool = default_memory_pool()); - /// \brief Unify dictionaries accross the chunks of each table column + /// \brief Unify dictionaries across the chunks of each table column /// /// The dictionaries in each table column will be unified, their indices /// accordingly transposed. diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 2f3ee6e2d49a5..25df875674b4c 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1130,7 +1130,7 @@ TEST(TestDictionary, Validate) { ASSERT_RAISES(Invalid, arr->ValidateFull()); #if !defined(__APPLE__) - // GH-35712: ASSERT_DEATH would make testing slow on MacOS. + // GH-35712: ASSERT_DEATH would make testing slow on macOS. ASSERT_DEATH( { std::shared_ptr null_dict_arr = diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 0b591d401804d..b08fa99168616 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -300,7 +300,7 @@ class TestListArray : public ::testing::Test { ASSERT_OK(result->ValidateFull()); AssertArraysEqual(*result, *expected); - // Offets without nulls, will replace null with empty list + // Offsets without nulls, will replace null with empty list ASSERT_OK_AND_ASSIGN(result, FromArrays(*offsets_wo_nulls, *sizes_wo_nulls, *values)); ASSERT_OK(result->ValidateFull()); AssertArraysEqual(*result, *std::dynamic_pointer_cast( diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 03f3e5af29908..7868fc4f28837 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -393,7 +393,7 @@ Result> ListViewFromListImpl( const auto* offsets = list_data->template GetValues(1, 0); auto* sizes = sizes_buffer->mutable_data_as(); // Zero the initial padding area to avoid leaking any data when buffers are - // sent over IPC or throught the C Data interface. + // sent over IPC or through the C Data interface. memset(sizes, 0, list_data->offset * sizeof(offset_type)); for (int64_t i = list_data->offset; i < buffer_length; i++) { sizes[i] = offsets[i + 1] - offsets[i]; @@ -778,7 +778,7 @@ Result> MapArray::FromArraysInternal( } if (keys->null_count() != 0) { - return Status::Invalid("Map can not contain NULL valued keys"); + return Status::Invalid("Map cannot contain NULL valued keys"); } if (keys->length() != items->length()) { diff --git a/cpp/src/arrow/array/array_run_end.h b/cpp/src/arrow/array/array_run_end.h index 9770aa1fbbb1c..b46b0855ab367 100644 --- a/cpp/src/arrow/array/array_run_end.h +++ b/cpp/src/arrow/array/array_run_end.h @@ -59,7 +59,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array { /// /// The length and offset parameters refer to the dimensions of the logical /// array which is the array we would get after expanding all the runs into - /// repeated values. As such, length can be much greater than the lenght of + /// repeated values. As such, length can be much greater than the length of /// the child run_ends and values arrays. RunEndEncodedArray(const std::shared_ptr& type, int64_t length, const std::shared_ptr& run_ends, @@ -69,7 +69,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array { /// /// The length and offset parameters refer to the dimensions of the logical /// array which is the array we would get after expanding all the runs into - /// repeated values. As such, length can be much greater than the lenght of + /// repeated values. As such, length can be much greater than the length of /// the child run_ends and values arrays. static Result> Make( const std::shared_ptr& type, int64_t logical_length, @@ -122,7 +122,7 @@ class ARROW_EXPORT RunEndEncodedArray : public Array { /// run-ends) necessary to represent the logical range of values from offset /// to length. /// - /// Avoid calling this function if the physical length can be estabilished in + /// Avoid calling this function if the physical length can be established in /// some other way (e.g. when iterating over the runs sequentially until the /// end). This function uses binary-search, so it has a O(log N) cost. int64_t FindPhysicalLength() const; diff --git a/cpp/src/arrow/array/array_struct_test.cc b/cpp/src/arrow/array/array_struct_test.cc index 73d53a7efa59b..5505ec636c7f8 100644 --- a/cpp/src/arrow/array/array_struct_test.cc +++ b/cpp/src/arrow/array/array_struct_test.cc @@ -541,7 +541,7 @@ TEST_F(TestStructBuilder, TestEquality) { ASSERT_OK(char_vb->Reserve(list_values.size())); ASSERT_OK(int_vb->Reserve(int_values.size())); - // setup two equal arrays, one of which takes an unequal bitmap + // set up two equal arrays, one of which takes an unequal bitmap ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data())); ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(), list_is_valid.data())); @@ -574,7 +574,7 @@ TEST_F(TestStructBuilder, TestEquality) { ASSERT_OK(char_vb->Resize(list_values.size())); ASSERT_OK(int_vb->Resize(int_values.size())); - // setup an unequal one with the unequal bitmap + // set up an unequal one with the unequal bitmap ASSERT_OK(builder_->AppendValues(unequal_struct_is_valid.size(), unequal_struct_is_valid.data())); ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(), @@ -592,7 +592,7 @@ TEST_F(TestStructBuilder, TestEquality) { ASSERT_OK(char_vb->Resize(list_values.size())); ASSERT_OK(int_vb->Resize(int_values.size())); - // setup an unequal one with unequal offsets + // set up an unequal one with unequal offsets ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data())); ASSERT_OK(list_vb->AppendValues(unequal_list_offsets.data(), unequal_list_offsets.size(), @@ -610,7 +610,7 @@ TEST_F(TestStructBuilder, TestEquality) { ASSERT_OK(char_vb->Resize(list_values.size())); ASSERT_OK(int_vb->Resize(int_values.size())); - // setup anunequal one with unequal values + // set up an unequal one with unequal values ASSERT_OK(builder_->AppendValues(struct_is_valid.size(), struct_is_valid.data())); ASSERT_OK(list_vb->AppendValues(list_offsets.data(), list_offsets.size(), list_is_valid.data())); diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h index 05af850fd149c..11036797e014f 100644 --- a/cpp/src/arrow/array/builder_base.h +++ b/cpp/src/arrow/array/builder_base.h @@ -349,7 +349,7 @@ inline Result> MakeBuilderExactIndex( } /// \brief Construct an empty DictionaryBuilder initialized optionally -/// with a pre-existing dictionary +/// with a preexisting dictionary /// \param[in] pool the MemoryPool to use for allocations /// \param[in] type the dictionary type to create the builder for /// \param[in] dictionary the initial dictionary, if any. May be nullptr diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 21c2d4b270eb1..8065752f3e278 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -677,7 +677,7 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder { /// \brief Vector append /// - /// If passed, valid_bytes wil be read and any zero byte + /// If passed, valid_bytes will be read and any zero byte /// will cause the corresponding slot to be null /// /// This function affects only the validity bitmap; the child values must be appended diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index c002c0817b194..8454ac8f1d5fb 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -221,7 +221,7 @@ void ArraySpan::SetMembers(const ArrayData& data) { this->null_count = 0; } - // Makes sure any other buffers are seen as null / non-existent + // Makes sure any other buffers are seen as null / nonexistent for (int i = static_cast(data.buffers.size()); i < 3; ++i) { this->buffers[i] = {}; } diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 362df833781a1..01c56d44d8c64 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -3043,7 +3043,7 @@ TEST_F(TestArrayImport, RunEndEncodedWithOffset) { REEFromJSON(ree_type, "[-2.0, -2.0, -2.0, -2.0, 3.0, 3.0, 3.0]")); CheckImport(expected); - // Ofsset in parent + // Offset in parent FillPrimitive(AddChild(), 5, 0, 0, run_ends_buffers5); FillPrimitive(AddChild(), 5, 0, 0, primitive_buffers_no_nulls5); FillRunEndEncoded(5, 2); @@ -3295,7 +3295,7 @@ TEST_F(TestArrayImport, ListError) { } TEST_F(TestArrayImport, ListViewNoError) { - // Unlike with lists, importing a length-0 list-view with all buffers ommitted is + // Unlike with lists, importing a length-0 list-view with all buffers omitted is // not an error. List-views don't need an extra offset value, so an empty offsets // buffer is valid in this case. diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index 46dccaf3c6b86..6ca52ab46ca68 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -228,7 +228,7 @@ TEST_F(TestChunkedArray, Validate) { random::RandomArrayGenerator gen(0); - // Valid if non-empty and ommitted type + // Valid if non-empty and omitted type ArrayVector arrays = {gen.Int64(50, 0, 100, 0.1), gen.Int64(50, 0, 100, 0.1)}; auto chunks_with_no_type = std::make_shared(arrays, nullptr); ASSERT_OK(chunks_with_no_type->ValidateFull()); diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 3493c3146310d..81e36f82e3afd 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -452,7 +452,7 @@ Result TDigest(const Datum& value, /// \brief Find the first index of a value in an array. /// /// \param[in] value The array to search. -/// \param[in] options The array to search for. See IndexOoptions. +/// \param[in] options The array to search for. See IndexOptions. /// \param[in] ctx the function execution context, optional /// \return out a Scalar containing the index (or -1 if not found). /// diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 9f12471ddca14..8da3a9f5c5521 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -491,7 +491,7 @@ struct ARROW_EXPORT AssumeTimezoneOptions : public FunctionOptions { /// How to interpret ambiguous local times (due to DST shifts) Ambiguous ambiguous; - /// How to interpret non-existent local times (due to DST shifts) + /// How to interpret nonexistent local times (due to DST shifts) Nonexistent nonexistent; }; @@ -1589,7 +1589,7 @@ ARROW_EXPORT Result MonthsBetween(const Datum& left, const Datum& right, ARROW_EXPORT Result WeeksBetween(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR); -/// \brief Month Day Nano Between finds the number of months, days, and nonaseconds +/// \brief Month Day Nano Between finds the number of months, days, and nanoseconds /// between two values /// /// \param[in] left input treated as the start time diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 0233090ef6fb9..a0b2cefe5efd0 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -402,7 +402,7 @@ Result> NthToIndices(const Array& values, int64_t n, /// \brief Return indices that partition an array around n-th sorted element. /// -/// This overload takes a PartitionNthOptions specifiying the pivot index +/// This overload takes a PartitionNthOptions specifying the pivot index /// and the null handling. /// /// \param[in] values array to be partitioned @@ -453,7 +453,7 @@ Result> SortIndices(const Array& array, /// \brief Return the indices that would sort an array. /// -/// This overload takes a ArraySortOptions specifiying the sort order +/// This overload takes a ArraySortOptions specifying the sort order /// and the null handling. /// /// \param[in] array array to sort @@ -487,7 +487,7 @@ Result> SortIndices(const ChunkedArray& chunked_array, /// \brief Return the indices that would sort a chunked array. /// -/// This overload takes a ArraySortOptions specifiying the sort order +/// This overload takes a ArraySortOptions specifying the sort order /// and the null handling. /// /// \param[in] chunked_array chunked array to sort diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index c18dfa0952245..28dcf493fa294 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -1164,7 +1164,7 @@ class ScalarAggExecutor : public KernelExecutorImpl { // TODO(wesm): this is odd and should be examined soon -- only one state // "should" be needed per thread of execution - // FIXME(ARROW-11840) don't merge *any* aggegates for every batch + // FIXME(ARROW-11840) don't merge *any* aggregates for every batch ARROW_ASSIGN_OR_RAISE(auto batch_state, kernel_->init(kernel_ctx_, {kernel_, *input_types_, options_})); diff --git a/cpp/src/arrow/compute/exec_internal.h b/cpp/src/arrow/compute/exec_internal.h index 8beff2a6c63ac..7e4f364a9288e 100644 --- a/cpp/src/arrow/compute/exec_internal.h +++ b/cpp/src/arrow/compute/exec_internal.h @@ -46,7 +46,7 @@ class ARROW_EXPORT ExecSpanIterator { public: ExecSpanIterator() = default; - /// \brief Initialize itertor iterator and do basic argument validation + /// \brief Initialize iterator and do basic argument validation /// /// \param[in] batch the input ExecBatch /// \param[in] max_chunksize the maximum length of each ExecSpan. Depending diff --git a/cpp/src/arrow/compute/exec_test.cc b/cpp/src/arrow/compute/exec_test.cc index d661e5735fea6..cfce0c57fa416 100644 --- a/cpp/src/arrow/compute/exec_test.cc +++ b/cpp/src/arrow/compute/exec_test.cc @@ -1232,7 +1232,7 @@ void TestCallScalarFunctionPreallocationCases::DoTest(FunctionCallerMaker caller } // Set the exec_chunksize to be smaller, so now we have several invocations - // of the kernel, but still the output is onee array + // of the kernel, but still the output is one array { std::vector args = {Datum(arr)}; exec_ctx_->set_exec_chunksize(80); diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index ddd241652460e..1fbcd6a249093 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -1100,7 +1100,7 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) { AddFirstLastKernels(FirstLastInit, TemporalTypes(), func.get()); DCHECK_OK(registry->AddFunction(std::move(func))); - // Add first/last as convience functions + // Add first/last as convenience functions func = std::make_shared("first", Arity::Unary(), first_doc, &default_scalar_aggregate_options); AddFirstOrLastAggKernel(func.get(), first_last_func); diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h index 4966e9871d62c..f08e7aaa538bb 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h +++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h @@ -472,7 +472,7 @@ struct FirstLastImpl : public ScalarAggregator { this->count += arr.length() - null_count; if (null_count == 0) { - // If there are no null valus, we can just merge + // If there are no null values, we can just merge // the first and last element this->state.MergeOne(arr.GetView(0)); this->state.MergeOne(arr.GetView(arr.length() - 1)); diff --git a/cpp/src/arrow/compute/kernels/aggregate_mode.cc b/cpp/src/arrow/compute/kernels/aggregate_mode.cc index 7f359ead6cb83..63a8a0462c15f 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_mode.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_mode.cc @@ -72,7 +72,7 @@ Result> PrepareOutput(int64_t n, KernelContext* ctx, } // find top-n value:count pairs with minimal heap -// suboptimal for tiny or large n, possibly okay as we're not in hot path +// suboptimal for tiny or large n, possibly okay as we aren't in hot path template Status Finalize(KernelContext* ctx, const DataType& type, ExecResult* out, Generator&& gen) { @@ -115,7 +115,7 @@ Status Finalize(KernelContext* ctx, const DataType& type, ExecResult* out, return Status::OK(); } -// count value occurances for integers with narrow value range +// count value occurrences for integers with narrow value range // O(1) space, O(n) time template struct CountModer { diff --git a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc index e675a1cec86c9..f4826229dd46c 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_quantile.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_quantile.cc @@ -120,7 +120,7 @@ struct SortQuantiler { }); // input array is partitioned around data point at `last_index` (pivot) - // for next quatile which is smaller, we only consider inputs left of the pivot + // for next quantile which is smaller, we only consider inputs left of the pivot uint64_t last_index = in_buffer.size(); if (is_datapoint) { CType* out_buffer = out_data->template GetMutableValues(1); diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index aa19fb3401232..65439af2748b5 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -478,7 +478,7 @@ TEST_F(TestSumKernelRoundOff, Basics) { // array = np.arange(321000, dtype='float64') // array -= np.mean(array) - // array *= arrray + // array *= array double index = 0; ASSERT_OK_AND_ASSIGN( auto array, ArrayFromBuilderVisitor( @@ -3653,7 +3653,7 @@ class TestPrimitiveQuantileKernel : public ::testing::Test { #define INTYPE(x) Datum(static_cast(x)) #define DOUBLE(x) Datum(static_cast(x)) -// output type per interplation: linear, lower, higher, nearest, midpoint +// output type per interpolation: linear, lower, higher, nearest, midpoint #define O(a, b, c, d, e) \ { DOUBLE(a), INTYPE(b), INTYPE(c), INTYPE(d), DOUBLE(e) } diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 47cae538e2e3f..c37e45513d040 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -1848,8 +1848,8 @@ struct GroupedFirstLastImpl final : public GroupedAggregator { const ArrayData& group_id_mapping) override { // The merge is asymmetric. "first" from this state gets pick over "first" from other // state. "last" from other state gets pick over from this state. This is so that when - // using with segmeneted aggregation, we still get the correct "first" and "last" - // value for the entire segement. + // using with segmented aggregation, we still get the correct "first" and "last" + // value for the entire segment. auto other = checked_cast(&raw_other); auto raw_firsts = firsts_.mutable_data(); diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc index 4b678da5f1b42..17e9951d69bc2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_benchmark.cc @@ -33,7 +33,7 @@ constexpr auto kSeed = 0x94378165; using BinaryOp = Result(const Datum&, const Datum&, ArithmeticOptions, ExecContext*); -// Add explicit overflow-checked shortcuts, for easy benchmark parametering. +// Add explicit overflow-checked shortcuts, for easy benchmark parameterizing. static Result AddChecked(const Datum& left, const Datum& right, ArithmeticOptions options = ArithmeticOptions(), ExecContext* ctx = NULLPTR) { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index 756b3028c4a59..37a1bcbc02d73 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -1857,7 +1857,7 @@ TEST_F(TestBinaryArithmeticDecimal, DispatchBest) { } } -// reference result from bc (precsion=100, scale=40) +// reference result from bc (precision=100, scale=40) TEST_F(TestBinaryArithmeticDecimal, AddSubtract) { // array array, decimal128 { diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index c84125bbdd19e..09520e98bcdac 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2826,19 +2826,19 @@ TEST(Cast, StructToDifferentNullabilityStruct) { ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), Cast(src_nullable, options1_non_nullable)); - std::vector> fields_dest2_non_nullble = { + std::vector> fields_dest2_non_nullable = { std::make_shared("a", int64(), false), std::make_shared("c", int64(), false)}; - const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullble); + const auto dest2_non_nullable = arrow::struct_(fields_dest2_non_nullable); const auto options2_non_nullable = CastOptions::Safe(dest2_non_nullable); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, ::testing::HasSubstr("cannot cast nullable field to non-nullable field"), Cast(src_nullable, options2_non_nullable)); - std::vector> fields_dest3_non_nullble = { + std::vector> fields_dest3_non_nullable = { std::make_shared("c", int64(), false)}; - const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullble); + const auto dest3_non_nullable = arrow::struct_(fields_dest3_non_nullable); const auto options3_non_nullable = CastOptions::Safe(dest3_non_nullable); EXPECT_RAISES_WITH_MESSAGE_THAT( TypeError, diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index a11aab81742ed..2deb467e4340d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -69,7 +69,7 @@ template class TestIfElsePrimitive : public ::testing::Test {}; // There are a lot of tests here if we cover all the types and it gets slow on valgrind -// so we overrdie the standard type sets with a smaller range +// so we override the standard type sets with a smaller range #ifdef ARROW_VALGRIND using IfElseNumericBasedTypes = ::testing::Types> { if (has_halfway_point && (remainder == half_multiple || remainder == neg_half_multiple)) { // On the halfway point, use tiebreaker - // Manually implement rounding since we're not actually rounding a + // Manually implement rounding since we aren't actually rounding a // decimal value, but rather manipulating the multiple switch (kRoundMode) { case RoundMode::HALF_DOWN: @@ -638,7 +638,7 @@ struct RoundToMultiple> { } } } else { - // Manually implement rounding since we're not actually rounding a + // Manually implement rounding since we aren't actually rounding a // decimal value, but rather manipulating the multiple switch (kRoundMode) { case RoundMode::DOWN: @@ -747,7 +747,7 @@ struct Round { } else { round_val = RoundImpl::Round(round_val); } - // Equality check is ommitted so that the common case of 10^0 (integer rounding) + // Equality check is omitted so that the common case of 10^0 (integer rounding) // uses multiply-only round_val = ndigits > 0 ? (round_val / pow10) : (round_val * pow10); if (!std::isfinite(round_val)) { diff --git a/cpp/src/arrow/compute/kernels/scalar_string_internal.h b/cpp/src/arrow/compute/kernels/scalar_string_internal.h index 1a9969441655d..7a5d5a7c86e85 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_internal.h +++ b/cpp/src/arrow/compute/kernels/scalar_string_internal.h @@ -306,7 +306,7 @@ struct StringSplitExec { using ListOffsetsBuilderType = TypedBufferBuilder; using State = OptionsWrapper; - // Keep the temporary storage accross individual values, to minimize reallocations + // Keep the temporary storage across individual values, to minimize reallocations std::vector parts; Options options; diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index ff14f5e7a5c5d..5dec16d89e29c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -2060,7 +2060,7 @@ TYPED_TEST(TestStringKernels, SliceCodeunitsBasic) { this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"([""])", &options_edgecase_1); - // this is a safeguard agains an optimization path possible, but actually a tricky case + // this is a safeguard against an optimization path possible, but actually a tricky case SliceOptions options_edgecase_2{-6, -2}; this->CheckUnary("utf8_slice_codeunits", R"(["𝑓öõḍš"])", this->type(), R"(["𝑓öõ"])", &options_edgecase_2); @@ -2189,7 +2189,7 @@ TYPED_TEST(TestBinaryKernels, SliceBytesBasic) { "ds\"]", this->type(), R"([""])", &options_edgecase_1); - // this is a safeguard agains an optimization path possible, but actually a tricky case + // this is a safeguard against an optimization path possible, but actually a tricky case SliceOptions options_edgecase_2{-6, -2}; this->CheckUnary("binary_slice", "[\"f\xc2\xa2" diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 4c7975add0308..66668ef81869c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2101,9 +2101,9 @@ TEST_F(ScalarTemporalTest, StrftimeNoTimezone) { TEST_F(ScalarTemporalTest, StrftimeInvalidTimezone) { const char* seconds = R"(["1970-01-01T00:00:59", null])"; - auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "non-existent"), seconds); + auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "nonexistent"), seconds); EXPECT_RAISES_WITH_MESSAGE_THAT( - Invalid, testing::HasSubstr("Cannot locate timezone 'non-existent'"), + Invalid, testing::HasSubstr("Cannot locate timezone 'nonexistent'"), Strftime(arr, StrftimeOptions())); } @@ -2159,12 +2159,12 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { - auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); + auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "nonexistent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds); EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, - testing::HasSubstr("Cannot find locale 'non-existent'"), + testing::HasSubstr("Cannot find locale 'nonexistent'"), Strftime(arr, options)); } @@ -2601,7 +2601,7 @@ TEST_F(ScalarTemporalTestStrictCeil, TestCeilTemporalStrictCeil) { TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, CeilUTC) { std::string op = "ceil_temporal"; - // Data for tests below was generaed via lubridate with the exception + // Data for tests below was generated via lubridate with the exception // of week data because lubridate currently does not support rounding to // multiple of week. const char* ceil_15_nanosecond = @@ -2989,7 +2989,7 @@ TEST_F(ScalarTemporalTest, TestFloorTemporal) { TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, FloorUTC) { std::string op = "floor_temporal"; - // Data for tests below was generaed via lubridate with the exception + // Data for tests below was generated via lubridate with the exception // of week data because lubridate currently does not support rounding to // multiple of week. const char* floor_15_nanosecond = @@ -3402,7 +3402,7 @@ TEST_F(ScalarTemporalTest, TestCeilFloorRoundTemporalBrussels) { TEST_F(ScalarTemporalTestMultipleSinceGreaterUnit, RoundUTC) { std::string op = "round_temporal"; - // Data for tests below was generaed via lubridate with the exception + // Data for tests below was generated via lubridate with the exception // of week data because lubridate currently does not support rounding to // multiple of week. const char* round_15_nanosecond = diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc index 943fdcd6b147f..811ed23e1134b 100644 --- a/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc +++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode.cc @@ -30,11 +30,11 @@ namespace compute { namespace internal { namespace { -struct RunEndEncondingState : public KernelState { - explicit RunEndEncondingState(std::shared_ptr run_end_type) +struct RunEndEncodingState : public KernelState { + explicit RunEndEncodingState(std::shared_ptr run_end_type) : run_end_type{std::move(run_end_type)} {} - ~RunEndEncondingState() override = default; + ~RunEndEncodingState() override = default; std::shared_ptr run_end_type; }; @@ -273,7 +273,7 @@ struct RunEndEncodeExec { template static Status Exec(KernelContext* ctx, const ExecSpan& span, ExecResult* result) { - auto state = checked_cast(ctx->state()); + auto state = checked_cast(ctx->state()); switch (state->run_end_type->id()) { case Type::INT16: return DoExec(ctx, span, result); @@ -290,7 +290,7 @@ struct RunEndEncodeExec { /// \brief The OutputType::Resolver of the "run_end_decode" function. static Result ResolveOutputType( KernelContext* ctx, const std::vector& input_types) { - auto state = checked_cast(ctx->state()); + auto state = checked_cast(ctx->state()); return TypeHolder(std::make_shared(state->run_end_type, input_types[0].GetSharedPtr())); } @@ -301,7 +301,7 @@ Result> RunEndEncodeInit(KernelContext*, auto* options = checked_cast(args.options); auto run_end_type = options ? options->run_end_type : RunEndEncodeOptions::Defaults().run_end_type; - return std::make_unique(std::move(run_end_type)); + return std::make_unique(std::move(run_end_type)); } template diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc index 5000de8996280..14217afb6f54d 100644 --- a/cpp/src/arrow/compute/kernels/vector_select_k.cc +++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc @@ -71,9 +71,9 @@ class SelectKComparator { } }; -class ArraySelecter : public TypeVisitor { +class ArraySelector : public TypeVisitor { public: - ArraySelecter(ExecContext* ctx, const Array& array, const SelectKOptions& options, + ArraySelector(ExecContext* ctx, const Array& array, const SelectKOptions& options, Datum* output) : TypeVisitor(), ctx_(ctx), @@ -163,9 +163,9 @@ struct TypedHeapItem { ArrayType* array; }; -class ChunkedArraySelecter : public TypeVisitor { +class ChunkedArraySelector : public TypeVisitor { public: - ChunkedArraySelecter(ExecContext* ctx, const ChunkedArray& chunked_array, + ChunkedArraySelector(ExecContext* ctx, const ChunkedArray& chunked_array, const SelectKOptions& options, Datum* output) : TypeVisitor(), chunked_array_(chunked_array), @@ -272,13 +272,13 @@ class ChunkedArraySelecter : public TypeVisitor { Datum* output_; }; -class RecordBatchSelecter : public TypeVisitor { +class RecordBatchSelector : public TypeVisitor { private: using ResolvedSortKey = ResolvedRecordBatchSortKey; using Comparator = MultipleKeyComparator; public: - RecordBatchSelecter(ExecContext* ctx, const RecordBatch& record_batch, + RecordBatchSelector(ExecContext* ctx, const RecordBatch& record_batch, const SelectKOptions& options, Datum* output) : TypeVisitor(), ctx_(ctx), @@ -390,7 +390,7 @@ class RecordBatchSelecter : public TypeVisitor { Comparator comparator_; }; -class TableSelecter : public TypeVisitor { +class TableSelector : public TypeVisitor { private: struct ResolvedSortKey { ResolvedSortKey(const std::shared_ptr& chunked_array, @@ -419,7 +419,7 @@ class TableSelecter : public TypeVisitor { using Comparator = MultipleKeyComparator; public: - TableSelecter(ExecContext* ctx, const Table& table, const SelectKOptions& options, + TableSelector(ExecContext* ctx, const Table& table, const SelectKOptions& options, Datum* output) : TypeVisitor(), ctx_(ctx), @@ -609,32 +609,32 @@ class SelectKUnstableMetaFunction : public MetaFunction { Result SelectKth(const Array& array, const SelectKOptions& options, ExecContext* ctx) const { Datum output; - ArraySelecter selecter(ctx, array, options, &output); - ARROW_RETURN_NOT_OK(selecter.Run()); + ArraySelector selector(ctx, array, options, &output); + ARROW_RETURN_NOT_OK(selector.Run()); return output; } Result SelectKth(const ChunkedArray& chunked_array, const SelectKOptions& options, ExecContext* ctx) const { Datum output; - ChunkedArraySelecter selecter(ctx, chunked_array, options, &output); - ARROW_RETURN_NOT_OK(selecter.Run()); + ChunkedArraySelector selector(ctx, chunked_array, options, &output); + ARROW_RETURN_NOT_OK(selector.Run()); return output; } Result SelectKth(const RecordBatch& record_batch, const SelectKOptions& options, ExecContext* ctx) const { ARROW_RETURN_NOT_OK(CheckConsistency(*record_batch.schema(), options.sort_keys)); Datum output; - RecordBatchSelecter selecter(ctx, record_batch, options, &output); - ARROW_RETURN_NOT_OK(selecter.Run()); + RecordBatchSelector selector(ctx, record_batch, options, &output); + ARROW_RETURN_NOT_OK(selector.Run()); return output; } Result SelectKth(const Table& table, const SelectKOptions& options, ExecContext* ctx) const { ARROW_RETURN_NOT_OK(CheckConsistency(*table.schema(), options.sort_keys)); Datum output; - TableSelecter selecter(ctx, table, options, &output); - ARROW_RETURN_NOT_OK(selecter.Run()); + TableSelector selector(ctx, table, options, &output); + ARROW_RETURN_NOT_OK(selector.Run()); return output; } }; diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index 4a5e579fb155e..a25b04ae4fa65 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -171,7 +171,7 @@ class PrimitiveFilterImpl { } if (out_arr->buffers[0] != nullptr) { - // May not be allocated if neither filter nor values contains nulls + // May be unallocated if neither filter nor values contain nulls out_is_valid_ = out_arr->buffers[0]->mutable_data(); } out_data_ = reinterpret_cast(out_arr->buffers[1]->mutable_data()); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 30e85c1f71089..bdf9f5454fdef 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -2488,7 +2488,7 @@ TEST(TestIndicesNonZero, IndicesNonZeroBoolean) { Datum actual; std::shared_ptr result; - // boool + // bool ASSERT_OK_AND_ASSIGN( actual, CallFunction("indices_nonzero", {ArrayFromJSON(boolean(), "[null, true, false, true]")})); diff --git a/cpp/src/arrow/compute/key_map.cc b/cpp/src/arrow/compute/key_map.cc index 525dae850f19b..a027ec811cf24 100644 --- a/cpp/src/arrow/compute/key_map.cc +++ b/cpp/src/arrow/compute/key_map.cc @@ -505,7 +505,7 @@ void SwissTable::find(const int num_keys, const uint32_t* hashes, // Slow processing of input keys in the most generic case. // Handles inserting new keys. -// Pre-existing keys will be handled correctly, although the intended use is for this +// Preexisting keys will be handled correctly, although the intended use is for this // call to follow a call to find() method, which would only pass on new keys that were // not present in the hash table. // @@ -617,7 +617,7 @@ Status SwissTable::map_new_keys(uint32_t num_ids, uint16_t* ids, const uint32_t* ARROW_DCHECK(static_cast(num_ids) <= (1 << log_minibatch_)); ARROW_DCHECK(static_cast(max_id + 1) <= (1 << log_minibatch_)); - // Allocate temporary buffers for slot ids and intialize them + // Allocate temporary buffers for slot ids and initialize them auto slot_ids_buf = util::TempVectorHolder(temp_stack, max_id + 1); uint32_t* slot_ids = slot_ids_buf.mutable_data(); init_slot_ids_for_new_keys(num_ids, ids, hashes, slot_ids); diff --git a/cpp/src/arrow/compute/key_map.h b/cpp/src/arrow/compute/key_map.h index 85ef9029d6fc9..8e06dc83483aa 100644 --- a/cpp/src/arrow/compute/key_map.h +++ b/cpp/src/arrow/compute/key_map.h @@ -142,7 +142,7 @@ class ARROW_EXPORT SwissTable { void extract_group_ids_imp(const int num_keys, const uint16_t* selection, const uint32_t* hashes, const uint8_t* local_slots, uint32_t* out_group_ids, int elements_offset, - int element_mutltiplier) const; + int element_multiplier) const; inline uint64_t next_slot_to_visit(uint64_t block_index, int slot, int match_found) const; @@ -187,7 +187,7 @@ class ARROW_EXPORT SwissTable { // Slow processing of input keys in the most generic case. // Handles inserting new keys. - // Pre-existing keys will be handled correctly, although the intended use is for this + // Preexisting keys will be handled correctly, although the intended use is for this // call to follow a call to find() method, which would only pass on new keys that were // not present in the hash table. // diff --git a/cpp/src/arrow/compute/key_map_avx2.cc b/cpp/src/arrow/compute/key_map_avx2.cc index 731553511044f..3526a6cb0f344 100644 --- a/cpp/src/arrow/compute/key_map_avx2.cc +++ b/cpp/src/arrow/compute/key_map_avx2.cc @@ -117,7 +117,7 @@ int SwissTable::early_filter_imp_avx2_x8(const int num_hashes, const uint32_t* h vlocal_slot = _mm256_add_epi32(_mm256_and_si256(vlocal_slot, _mm256_set1_epi32(0xff)), _mm256_and_si256(vgt, _mm256_set1_epi32(4))); - // Convert slot id relative to the block to slot id relative to the beginnning of the + // Convert slot id relative to the block to slot id relative to the beginning of the // table // uint64_t local_slot = _mm256_extract_epi64( diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc index 4e8b2b2d7cc3a..ee6e88cc0b44c 100644 --- a/cpp/src/arrow/compute/light_array.cc +++ b/cpp/src/arrow/compute/light_array.cc @@ -89,7 +89,7 @@ KeyColumnArray KeyColumnArray::Slice(int64_t offset, int64_t length) const { sliced.bit_offset_[0] = (bit_offset_[0] + offset) % 8; if (metadata_.fixed_length == 0 && !metadata_.is_null_type) { - ARROW_DCHECK(is_bool_type()) << "Expected BOOL type type but got a different type."; + ARROW_DCHECK(is_bool_type()) << "Expected BOOL type but got a different type."; sliced.buffers_[1] = buffers_[1] ? buffers_[1] + (bit_offset_[1] + offset) / 8 : nullptr; sliced.mutable_buffers_[1] = mutable_buffers_[1] diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 4e33f7b578ea8..c024c1440d53c 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -333,7 +333,7 @@ TEST(ResizableArrayData, Binary) { ASSERT_EQ(0, array.num_rows()); ASSERT_OK(array.ResizeFixedLengthBuffers(2)); ASSERT_EQ(2, array.num_rows()); - // At this point the offets memory has been allocated and needs to be filled + // At this point the offsets memory has been allocated and needs to be filled // in before we allocate the variable length memory int offsets_width = static_cast(arrow::internal::checked_pointer_cast(type) diff --git a/cpp/src/arrow/compute/ordering.h b/cpp/src/arrow/compute/ordering.h index e581269cc20dd..61caa2b570dd3 100644 --- a/cpp/src/arrow/compute/ordering.h +++ b/cpp/src/arrow/compute/ordering.h @@ -52,7 +52,7 @@ class ARROW_EXPORT SortKey : public util::EqualityComparable { bool Equals(const SortKey& other) const; std::string ToString() const; - /// A FieldRef targetting the sort column. + /// A FieldRef targeting the sort column. FieldRef target; /// How to order by this sort key. SortOrder order; diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc index 7fee136de7a0b..fab50d4f16e8d 100644 --- a/cpp/src/arrow/compute/registry_test.cc +++ b/cpp/src/arrow/compute/registry_test.cc @@ -68,7 +68,7 @@ TEST_P(TestRegistry, Basics) { ASSERT_OK_AND_ASSIGN(std::shared_ptr f1, registry_->GetFunction("f1")); ASSERT_EQ("f1", f1->name()); - // Non-existent function + // Nonexistent function ASSERT_RAISES(KeyError, registry_->GetFunction("f2")); // Try adding a function with name collision diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index b3d28ef19a1a0..5e23eda16fda2 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -210,7 +210,7 @@ struct SimpleKeySegmenter : public BaseRowSegmenter { private: TypeHolder key_type_; - std::vector save_key_data_; // previusly seen segment-key grouping data + std::vector save_key_data_; // previously seen segment-key grouping data bool extend_was_called_; }; diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 15f00eaac2191..628a9c14f3e44 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -29,12 +29,12 @@ namespace arrow { namespace compute { /// \brief A segment -/// A segment group is a chunk of continous rows that have the same segment key. (For +/// A segment group is a chunk of continuous rows that have the same segment key. (For /// example, in ordered time series processing, segment key can be "date", and a segment /// group can be all the rows that belong to the same date.) A segment group can span -/// across multiple exec batches. A segment is a chunk of continous rows that has the same -/// segment key within a given batch. When a segment group span cross batches, it will -/// have multiple segments. A segment never spans cross batches. The segment data +/// across multiple exec batches. A segment is a chunk of continuous rows that has the +/// same segment key within a given batch. When a segment group span cross batches, it +/// will have multiple segments. A segment never spans cross batches. The segment data /// structure only makes sense when used along with a exec batch. struct ARROW_EXPORT Segment { /// \brief the offset into the batch where the segment starts @@ -92,7 +92,7 @@ class ARROW_EXPORT RowSegmenter { /// \brief Reset this segmenter /// /// A segmenter normally extends (see `Segment`) a segment from one batch to the next. - /// If segment-extenion is undesirable, for example when each batch is processed + /// If segment-extension is undesirable, for example when each batch is processed /// independently, then `Reset` should be invoked before processing the next batch. virtual Status Reset() = 0; diff --git a/cpp/src/arrow/csv/lexing_internal.h b/cpp/src/arrow/csv/lexing_internal.h index 357c5716d5115..b1da12750ac58 100644 --- a/cpp/src/arrow/csv/lexing_internal.h +++ b/cpp/src/arrow/csv/lexing_internal.h @@ -71,7 +71,7 @@ class BaseBloomFilter { // For example 'b' (ASCII value 98) will set/test bit #34 in the filter. // If the bit is set in the filter, the given character *may* be part // of the matched characters. If the bit is unset in the filter, - // the the given character *cannot* be part of the matched characters. + // the given character *cannot* be part of the matched characters. FilterType CharFilter(uint8_t c) const { return static_cast(1) << (c & kCharMask); } diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc index 30fc0bc6aca44..332fad054fea3 100644 --- a/cpp/src/arrow/csv/reader.cc +++ b/cpp/src/arrow/csv/reader.cc @@ -389,7 +389,7 @@ namespace { // The parsed batch contains a list of offsets for each of the columns so that columns // can be individually scanned // -// This operator is not re-entrant +// This operator is not reentrant class BlockParsingOperator { public: BlockParsingOperator(io::IOContext io_context, ParseOptions parse_options, diff --git a/cpp/src/arrow/csv/writer_benchmark.cc b/cpp/src/arrow/csv/writer_benchmark.cc index 9bbba7ebd7e9f..54c0f50613754 100644 --- a/cpp/src/arrow/csv/writer_benchmark.cc +++ b/cpp/src/arrow/csv/writer_benchmark.cc @@ -109,7 +109,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options, state.counters["null_percent"] = static_cast(state.range(0)); } -// Exercies UnQuotedColumnPopulator with integer +// Exercises UnQuotedColumnPopulator with integer void WriteCsvNumeric(benchmark::State& state) { auto batch = MakeIntTestBatch(kCsvRows, kCsvCols, state.range(0)); BenchmarkWriteCsv(state, WriteOptions::Defaults(), *batch); diff --git a/cpp/src/arrow/dataset/dataset.h b/cpp/src/arrow/dataset/dataset.h index 39936fbd7b5b2..1cdd92d5c42f2 100644 --- a/cpp/src/arrow/dataset/dataset.h +++ b/cpp/src/arrow/dataset/dataset.h @@ -398,7 +398,7 @@ class ARROW_DS_EXPORT Dataset : public std::enable_shared_from_this { /// /// Currently, `executor` is always the same as `internal::GetCPUThreadPool()`, /// which means the results from the underlying fragment generator will be - /// transfered to the default CPU thread pool. The generator itself is + /// transferred to the default CPU thread pool. The generator itself is /// offloaded to run on the default IO thread pool. virtual Result GetFragmentsAsyncImpl( compute::Expression predicate, arrow::internal::Executor* executor); diff --git a/cpp/src/arrow/dataset/file_json.cc b/cpp/src/arrow/dataset/file_json.cc index 6ca8405f03e2c..1d545c3969f6a 100644 --- a/cpp/src/arrow/dataset/file_json.cc +++ b/cpp/src/arrow/dataset/file_json.cc @@ -324,8 +324,8 @@ Result MakeBatchGenerator( const std::shared_ptr& file) { ARROW_ASSIGN_OR_RAISE(auto future, DoOpenReader(file->source(), format, scan_options)); auto maybe_reader = future.result(); - // Defer errors that occured during reader instantiation since they're likely related to - // batch-processing. + // Defer errors that occurred during reader instantiation since they're likely related + // to batch-processing. if (!maybe_reader.ok()) { return MakeFailingGenerator>(maybe_reader.status()); } diff --git a/cpp/src/arrow/dataset/file_json_test.cc b/cpp/src/arrow/dataset/file_json_test.cc index 3b0647d28f734..9626e8a5509df 100644 --- a/cpp/src/arrow/dataset/file_json_test.cc +++ b/cpp/src/arrow/dataset/file_json_test.cc @@ -162,7 +162,7 @@ std::shared_ptr ToFileSource(std::string json) { return std::make_shared(Buffer::FromString(std::move(json))); } -// Mixin for additional JSON-specific tests, compatibile with both format APIs. +// Mixin for additional JSON-specific tests, compatible with both format APIs. template class JsonScanMixin { public: diff --git a/cpp/src/arrow/dataset/file_parquet.h b/cpp/src/arrow/dataset/file_parquet.h index 5132a805bb4d6..f527ce5d70ae0 100644 --- a/cpp/src/arrow/dataset/file_parquet.h +++ b/cpp/src/arrow/dataset/file_parquet.h @@ -335,7 +335,7 @@ class ARROW_DS_EXPORT ParquetDatasetFactory : public DatasetFactory { /// \brief Create a ParquetDatasetFactory from a metadata source. /// /// Similar to the previous Make definition, but the metadata can be a Buffer - /// and the base_path is explicited instead of inferred from the metadata + /// and the base_path is explicit instead of inferred from the metadata /// path. /// /// \param[in] metadata source to open the metadata parquet file from diff --git a/cpp/src/arrow/dataset/scan_node.cc b/cpp/src/arrow/dataset/scan_node.cc index 5ed6eee5ddf83..c25c5b70ae1ec 100644 --- a/cpp/src/arrow/dataset/scan_node.cc +++ b/cpp/src/arrow/dataset/scan_node.cc @@ -94,7 +94,7 @@ Future>> GetFragments( /// fragment on disk actually had a column x, and the value was not 7, then we will prefer /// the guarantee in this invalid case. /// -/// Ths next step is to fetch the metadata for the fragment. For some formats (e.g. +/// The next step is to fetch the metadata for the fragment. For some formats (e.g. /// CSV) this may be quite simple (get the size of the file). For other formats (e.g. /// parquet) this is more involved and requires reading data. There is one metadata /// io-task per fragment. The metadata io-task creates an AsyncGenerator @@ -150,7 +150,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { } if (normalized.filter.call() && normalized.filter.IsBound()) { - // There is no easy way to make sure a filter was bound agaisnt the same + // There is no easy way to make sure a filter was bound against the same // function registry as the one in ctx so we just require it to be unbound // FIXME - Do we care if it was bound to a different function registry? return Status::Invalid("Scan filter must be unbound"); diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h index 5479a0d9db404..4479158ff20cc 100644 --- a/cpp/src/arrow/dataset/scanner.h +++ b/cpp/src/arrow/dataset/scanner.h @@ -141,7 +141,7 @@ struct ARROW_DS_EXPORT ScanOptions { /// Scan-specific options, which can be changed between scans of the same dataset. /// /// A dataset consists of one or more individual fragments. A fragment is anything -/// that is indepedently scannable, often a file. +/// that is independently scannable, often a file. /// /// Batches from all fragments will be converted to a single schema. This unified /// schema is referred to as the "dataset schema" and is the output schema for @@ -230,7 +230,7 @@ struct ARROW_DS_EXPORT ScanV2Options : public acero::ExecNodeOptions { /// for example, if scanning a parquet file that has batches with 100MiB of data /// then the actual readahead will be at least 100MiB /// - /// Set to 0 to disable readhead. When disabled, the scanner will read the + /// Set to 0 to disable readahead. When disabled, the scanner will read the /// dataset one batch at a time /// /// This limit applies across all fragments. If the limit is 32MiB and the diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index cde3a725c4663..fccfc80032d31 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -2591,7 +2591,7 @@ TEST(ScanNode, MinimalEndToEnd) { // for now, specify the projection as the full project expression (eventually this can // just be a list of materialized field names) compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)}); - // set the projection such that required project experssion field is included as a + // set the projection such that required project expression field is included as a // field_ref compute::Expression project_expr = field_ref("a"); options->projection = @@ -2686,7 +2686,7 @@ TEST(ScanNode, MinimalScalarAggEndToEnd) { // for now, specify the projection as the full project expression (eventually this can // just be a list of materialized field names) compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)}); - // set the projection such that required project experssion field is included as a + // set the projection such that required project expression field is included as a // field_ref compute::Expression project_expr = field_ref("a"); options->projection = @@ -2778,7 +2778,7 @@ TEST(ScanNode, MinimalGroupedAggEndToEnd) { // for now, specify the projection as the full project expression (eventually this can // just be a list of materialized field names) compute::Expression a_times_2 = call("multiply", {field_ref("a"), literal(2)}); - // set the projection such that required project experssion field is included as a + // set the projection such that required project expression field is included as a // field_ref compute::Expression a = field_ref("a"); compute::Expression b = field_ref("b"); @@ -2888,12 +2888,12 @@ TEST(ScanNode, OnlyLoadProjectedFields) { {acero::Declaration({"scan", dataset::ScanNodeOptions{dataset, scan_options}})}); ASSERT_OK_AND_ASSIGN(auto actual, acero::DeclarationToTable(declarations)); // Scan node always emits augmented fields so we drop those - ASSERT_OK_AND_ASSIGN(auto actualMinusAgumented, actual->SelectColumns({0, 1, 2})); + ASSERT_OK_AND_ASSIGN(auto actualMinusAugmented, actual->SelectColumns({0, 1, 2})); auto expected = TableFromJSON(dummy_schema, {R"([ [null, 1, null], [null, 4, null] ])"}); - AssertTablesEqual(*expected, *actualMinusAgumented, /*same_chunk_layout=*/false); + AssertTablesEqual(*expected, *actualMinusAugmented, /*same_chunk_layout=*/false); } } // namespace dataset diff --git a/cpp/src/arrow/dataset/subtree_test.cc b/cpp/src/arrow/dataset/subtree_test.cc index 75429a5fb7f95..fc13c20ecee49 100644 --- a/cpp/src/arrow/dataset/subtree_test.cc +++ b/cpp/src/arrow/dataset/subtree_test.cc @@ -133,7 +133,7 @@ void ExpectForestIs(std::vector infos, std::vector expected_roots) ASSERT_OK(forest.Visit( [&](Forest::Ref ref) -> Result { actual_roots.emplace_back(ref, infos); - return false; // only vist roots + return false; // only visit roots }, [](Forest::Ref) {})); diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index 51d39d532c82c..de0519afac9e1 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -1257,7 +1257,7 @@ class FileFormatScanNodeMixin : public FileFormatFixtureMixinV2, int64_t expected_batches() const { return GetParam().num_batches; } int64_t expected_rows() const { return GetParam().expected_rows(); } - // Override FileFormatFixtureMixin::GetRandomData to paramterize the # + // Override FileFormatFixtureMixin::GetRandomData to parameterize the # // of batches and rows per batch std::shared_ptr GetRandomData( std::shared_ptr schema) override { diff --git a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc index a6401e1d0b36d..225901c910f25 100644 --- a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc @@ -85,7 +85,7 @@ Result ExpressionFromProto( // expression which is not redundant. // // For example, if the base schema is [struct, i32] and the expression is - // field(0) the the extended expression output names might be ["foo", "my_expression"]. + // field(0) the extended expression output names might be ["foo", "my_expression"]. // The "foo" is redundant but we can verify it matches and reject if it does not. // // The one exception is struct literals which have no field names. For example, if diff --git a/cpp/src/arrow/engine/substrait/extension_set.h b/cpp/src/arrow/engine/substrait/extension_set.h index d9c0af081a546..0a502960447e6 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.h +++ b/cpp/src/arrow/engine/substrait/extension_set.h @@ -86,7 +86,7 @@ struct ARROW_ENGINE_EXPORT IdHashEq { /// \brief Owning storage for ids /// /// Substrait plans may reuse URIs and names in many places. For convenience -/// and performance Substarit ids are typically passed around as views. As we +/// and performance Substrait ids are typically passed around as views. As we /// convert a plan from Substrait to Arrow we need to copy these strings out of /// the Substrait buffer and into owned storage. This class serves as that owned /// storage. diff --git a/cpp/src/arrow/engine/substrait/options.cc b/cpp/src/arrow/engine/substrait/options.cc index 481375076734f..f8e7173386583 100644 --- a/cpp/src/arrow/engine/substrait/options.cc +++ b/cpp/src/arrow/engine/substrait/options.cc @@ -81,7 +81,7 @@ class DefaultExtensionProvider : public BaseExtensionProvider { rel.UnpackTo(&seg_agg_rel); return MakeSegmentedAggregateRel(conv_opts, inputs, seg_agg_rel, ext_set); } - return Status::NotImplemented("Unrecognized extension in Susbstrait plan: ", + return Status::NotImplemented("Unrecognized extension in Substrait plan: ", rel.DebugString()); } diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 2e72ae70edd88..1e771ccdd25c2 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -1334,7 +1334,7 @@ TEST(Substrait, GetRecordBatchReader) { ASSERT_OK_AND_ASSIGN(auto reader, ExecuteSerializedPlan(*buf)); ASSERT_OK_AND_ASSIGN(auto table, Table::FromRecordBatchReader(reader.get())); // Note: assuming the binary.parquet file contains fixed amount of records - // in case of a test failure, re-evalaute the content in the file + // in case of a test failure, re-evaluate the content in the file EXPECT_EQ(table->num_rows(), 12); }); } @@ -4223,7 +4223,7 @@ TEST(Substrait, ReadRelWithGlobFiles) { } }] })")); - // To avoid unnecessar metadata columns being included in the final result + // To avoid unnecessary metadata columns being included in the final result std::vector include_columns = {0, 1, 2}; compute::SortOptions options({compute::SortKey("A", compute::SortOrder::Ascending)}); CheckRoundTripResult(std::move(expected_table), buf, std::move(include_columns), @@ -6108,7 +6108,7 @@ TEST(Substrait, ExtendedExpressionSerialization) { TEST(Substrait, ExtendedExpressionInvalidPlans) { // The schema defines the type as {"x", "y"} but output_names has {"a", "y"} - constexpr std::string_view kBadOuptutNames = R"( + constexpr std::string_view kBadOutputNames = R"( { "referredExpr":[ { @@ -6159,7 +6159,7 @@ TEST(Substrait, ExtendedExpressionInvalidPlans) { )"; ASSERT_OK_AND_ASSIGN( - auto buf, internal::SubstraitFromJSON("ExtendedExpression", kBadOuptutNames)); + auto buf, internal::SubstraitFromJSON("ExtendedExpression", kBadOutputNames)); ASSERT_THAT(DeserializeExpressions(*buf), Raises(StatusCode::Invalid, testing::HasSubstr("Ambiguous plan"))); diff --git a/cpp/src/arrow/engine/substrait/visibility.h b/cpp/src/arrow/engine/substrait/visibility.h index cfd0db2747bba..d81d202ee6567 100644 --- a/cpp/src/arrow/engine/substrait/visibility.h +++ b/cpp/src/arrow/engine/substrait/visibility.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -// TODO(westonpace): Once we have a propert engine module this file +// TODO(westonpace): Once we have a proper engine module this file // should be renamed arrow/engine/visibility.h // This API is EXPERIMENTAL. diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc index b8be1edc49e60..2b8e703d3c66e 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc +++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc @@ -194,7 +194,7 @@ TEST_F(TestExtensionType, MetadataSerializationRoundtrip) { "Invalid dim_names"); } -TEST_F(TestExtensionType, RoudtripBatch) { +TEST_F(TestExtensionType, RoundtripBatch) { auto exact_ext_type = internal::checked_pointer_cast(ext_type_); std::vector> buffers = {nullptr, Buffer::Wrap(values_)}; @@ -383,7 +383,7 @@ TEST_F(TestExtensionType, SliceTensor) { ASSERT_EQ(sliced->length(), partial->length()); } -TEST_F(TestExtensionType, RoudtripBatchFromTensor) { +TEST_F(TestExtensionType, RoundtripBatchFromTensor) { auto exact_ext_type = internal::checked_pointer_cast(ext_type_); ASSERT_OK_AND_ASSIGN(auto tensor, Tensor::Make(value_type_, Buffer::Wrap(values_), shape_, {}, {"n", "x", "y"})); diff --git a/cpp/src/arrow/field_ref_test.cc b/cpp/src/arrow/field_ref_test.cc index 10e2564ed1896..0cb2da4f709a1 100644 --- a/cpp/src/arrow/field_ref_test.cc +++ b/cpp/src/arrow/field_ref_test.cc @@ -135,7 +135,7 @@ struct FieldPathTestCase { out.schema = arrow::schema({out.v0.field, out.v1.field}); out.type = struct_(out.schema->fields()); - // Create null bitmaps for the struct fields independent of its childrens' + // Create null bitmaps for the struct fields independent of its children's // bitmaps. For FieldPath::GetFlattened, parent/child bitmaps should be combined // - for FieldPath::Get, higher-level nulls are ignored. auto bitmap1_1 = gen.NullBitmap(kNumRows, 0.15); diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 4dde275da135f..8030c06d68f7b 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -335,7 +335,7 @@ class ObjectInputFile final : public io::RandomAccessFile { } return internal::ExceptionToStatus( "GetProperties failed for '" + blob_client_->GetUrl() + - "' with an unexpected Azure error. Can not initialise an ObjectInputFile " + "' with an unexpected Azure error. Cannot initialise an ObjectInputFile " "without knowing the file size.", exception); } @@ -561,7 +561,7 @@ class ObjectAppendStream final : public io::OutputStream { } else { return internal::ExceptionToStatus( "GetProperties failed for '" + block_blob_client_->GetUrl() + - "' with an unexpected Azure error. Can not initialise an " + "' with an unexpected Azure error. Cannot initialise an " "ObjectAppendStream without knowing whether a file already exists at " "this path, and if it exists, its size.", exception); diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index 7c86385126d40..b9971bf5bbe73 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -381,7 +381,7 @@ TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) { TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespaceFailsWithMissingContainer) { auto hierarchical_namespace = internal::HierarchicalNamespaceDetector(); ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get())); - ASSERT_NOT_OK(hierarchical_namespace.Enabled("non-existent-container")); + ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container")); } TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { @@ -394,7 +394,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) { TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) { AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory); - AssertFileInfo(fs_.get(), "non-existent-container", FileType::NotFound); + AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound); // URI ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + PreexistingContainerName())); diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index e030014159cf4..d440629a02496 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -304,7 +304,7 @@ namespace { /// Workhorse for streaming async implementation of `GetFileInfo` /// (`GetFileInfoGenerator`). /// -/// There are two variants of async discovery functions suported: +/// There are two variants of async discovery functions supported: /// 1. `DiscoverDirectoryFiles`, which parallelizes traversal of individual directories /// so that each directory results are yielded as a separate `FileInfoGenerator` via /// an underlying `DiscoveryImplIterator`, which delivers items in chunks (default size diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 26a1530660781..2cc907a63ce20 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -1042,7 +1042,7 @@ class RegionResolver { lock.unlock(); ARROW_ASSIGN_OR_RAISE(auto region, ResolveRegionUncached(bucket)); lock.lock(); - // Note we don't cache a non-existent bucket, as the bucket could be created later + // Note we don't cache a nonexistent bucket, as the bucket could be created later cache_[bucket] = region; return region; } @@ -1546,7 +1546,7 @@ class ObjectOutputStream final : public io::OutputStream { nbytes -= offset; }; - // Handle case where we have some bytes bufferred from prior calls. + // Handle case where we have some bytes buffered from prior calls. if (current_part_size_ > 0) { // Try to fill current buffer const int64_t to_copy = std::min(nbytes, kPartUploadSize - current_part_size_); @@ -3007,7 +3007,7 @@ S3GlobalOptions S3GlobalOptions::Defaults() { auto result = arrow::internal::GetEnvVar("ARROW_S3_LOG_LEVEL"); if (result.ok()) { - // Extract, trim, and downcase the value of the enivronment variable + // Extract, trim, and downcase the value of the environment variable auto value = arrow::internal::AsciiToLower(arrow::internal::TrimString(result.ValueUnsafe())); diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 9900a9a1c0aa5..90333e05e7334 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -130,7 +130,7 @@ struct ARROW_EXPORT S3Options { std::string role_arn; /// Optional identifier for an assumed role session. std::string session_name; - /// Optional external idenitifer to pass to STS when assuming a role + /// Optional external identifier to pass to STS when assuming a role std::string external_id; /// Frequency (in seconds) to refresh temporary credentials from assumed role int load_frequency = 900; @@ -185,7 +185,7 @@ struct ARROW_EXPORT S3Options { const std::string& external_id = "", int load_frequency = 900, const std::shared_ptr& stsClient = NULLPTR); - /// Configure with credentials from role assumed using a web identitiy token + /// Configure with credentials from role assumed using a web identity token void ConfigureAssumeRoleWithWebIdentityCredentials(); std::string GetAccessKey() const; diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index b789845bd1aac..487a6abb18903 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -365,10 +365,10 @@ TEST_F(S3RegionResolutionTest, RestrictedBucket) { } TEST_F(S3RegionResolutionTest, NonExistentBucket) { - auto maybe_region = ResolveS3BucketRegion("ursa-labs-non-existent-bucket"); + auto maybe_region = ResolveS3BucketRegion("ursa-labs-nonexistent-bucket"); ASSERT_RAISES(IOError, maybe_region); ASSERT_THAT(maybe_region.status().message(), - ::testing::HasSubstr("Bucket 'ursa-labs-non-existent-bucket' not found")); + ::testing::HasSubstr("Bucket 'ursa-labs-nonexistent-bucket' not found")); } TEST_F(S3RegionResolutionTest, InvalidBucketName) { @@ -645,13 +645,13 @@ TEST_F(TestS3FS, GetFileInfoObject) { // Nonexistent AssertFileInfo(fs_.get(), "bucket/emptyd", FileType::NotFound); AssertFileInfo(fs_.get(), "bucket/somed", FileType::NotFound); - AssertFileInfo(fs_.get(), "non-existent-bucket/somed", FileType::NotFound); + AssertFileInfo(fs_.get(), "nonexistent-bucket/somed", FileType::NotFound); // Trailing slashes AssertFileInfo(fs_.get(), "bucket/emptydir/", FileType::Directory, kNoSize); AssertFileInfo(fs_.get(), "bucket/somefile/", FileType::File, 9); AssertFileInfo(fs_.get(), "bucket/emptyd/", FileType::NotFound); - AssertFileInfo(fs_.get(), "non-existent-bucket/somed/", FileType::NotFound); + AssertFileInfo(fs_.get(), "nonexistent-bucket/somed/", FileType::NotFound); // URIs ASSERT_RAISES(Invalid, fs_->GetFileInfo("s3:bucket/emptydir")); @@ -1057,7 +1057,7 @@ TEST_F(TestS3FS, Move) { ASSERT_OK(fs_->Move("bucket/a=2/newfile", "bucket/a=3/newfile")); // Nonexistent - ASSERT_RAISES(IOError, fs_->Move("bucket/non-existent", "bucket/newfile2")); + ASSERT_RAISES(IOError, fs_->Move("bucket/nonexistent", "bucket/newfile2")); ASSERT_RAISES(IOError, fs_->Move("nonexistent-bucket/somefile", "bucket/newfile2")); ASSERT_RAISES(IOError, fs_->Move("bucket/somefile", "nonexistent-bucket/newfile2")); AssertFileInfo(fs_.get(), "bucket/newfile2", FileType::NotFound); diff --git a/cpp/src/arrow/filesystem/test_util.h b/cpp/src/arrow/filesystem/test_util.h index 8156721b8537c..aa56c3123438d 100644 --- a/cpp/src/arrow/filesystem/test_util.h +++ b/cpp/src/arrow/filesystem/test_util.h @@ -170,7 +170,7 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { virtual bool allow_move_dir_over_non_empty_dir() const { return false; } // - Whether the filesystem allows appending to a file virtual bool allow_append_to_file() const { return true; } - // - Whether the filesystem allows appending to a new (not existent yet) file + // - Whether the filesystem allows appending to a new (not yet extant) file virtual bool allow_append_to_new_file() const { return true; } // - Whether the filesystem supports directory modification times virtual bool have_directory_mtimes() const { return true; } diff --git a/cpp/src/arrow/flight/ArrowFlightTestingConfig.cmake.in b/cpp/src/arrow/flight/ArrowFlightTestingConfig.cmake.in index f072b2603e375..3c043b05a6bd5 100644 --- a/cpp/src/arrow/flight/ArrowFlightTestingConfig.cmake.in +++ b/cpp/src/arrow/flight/ArrowFlightTestingConfig.cmake.in @@ -32,7 +32,7 @@ find_dependency(ArrowTesting) include("${CMAKE_CURRENT_LIST_DIR}/ArrowFlightTestingTargets.cmake") -arrow_keep_backward_compatibility(ArrowFlightTetsing arrow_flight_testing) +arrow_keep_backward_compatibility(ArrowFlightTesting arrow_flight_testing) check_required_components(ArrowFlightTesting) diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h index e26a821359781..1df71d2029f74 100644 --- a/cpp/src/arrow/flight/client.h +++ b/cpp/src/arrow/flight/client.h @@ -299,7 +299,7 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \brief Request and poll a long running query /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request or a descriptor returned by a - /// prioir PollFlightInfo call + /// prior PollFlightInfo call /// \return Arrow result with the PollInfo describing the status of /// the requested query arrow::Result> PollFlightInfo( diff --git a/cpp/src/arrow/flight/cookie_internal.h b/cpp/src/arrow/flight/cookie_internal.h index 84647a1c94ca3..62c0390c585b3 100644 --- a/cpp/src/arrow/flight/cookie_internal.h +++ b/cpp/src/arrow/flight/cookie_internal.h @@ -35,14 +35,14 @@ namespace flight { namespace internal { /// \brief Case insensitive comparator for use by cookie caching map. Cookies are not -/// case sensitive. +/// case-sensitive. class ARROW_FLIGHT_EXPORT CaseInsensitiveComparator { public: bool operator()(const std::string& t1, const std::string& t2) const; }; /// \brief Case insensitive hasher for use by cookie caching map. Cookies are not -/// case sensitive. +/// case-sensitive. class ARROW_FLIGHT_EXPORT CaseInsensitiveHash { public: size_t operator()(const std::string& key) const; diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc index 5feb310fc14a2..522973bec7231 100644 --- a/cpp/src/arrow/flight/flight_internals_test.cc +++ b/cpp/src/arrow/flight/flight_internals_test.cc @@ -562,7 +562,7 @@ class TestCookieParsing : public ::testing::Test { EXPECT_EQ(cookie_as_string, cookie.AsCookieString()); } - void VerifyCookieDateConverson(std::string date, const std::string& converted_date) { + void VerifyCookieDateConversion(std::string date, const std::string& converted_date) { internal::Cookie::ConvertCookieDate(&date); EXPECT_EQ(converted_date, date); } @@ -646,21 +646,21 @@ TEST_F(TestCookieParsing, ToString) { } TEST_F(TestCookieParsing, DateConversion) { - VerifyCookieDateConverson("Mon, 01 jan 2038 22:15:36 GMT;", "01 01 2038 22:15:36"); - VerifyCookieDateConverson("TUE, 10 Feb 2038 22:15:36 GMT", "10 02 2038 22:15:36"); - VerifyCookieDateConverson("WED, 20 MAr 2038 22:15:36 GMT;", "20 03 2038 22:15:36"); - VerifyCookieDateConverson("thu, 15 APR 2038 22:15:36 GMT", "15 04 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 30 mAY 2038 22:15:36 GMT;", "30 05 2038 22:15:36"); - VerifyCookieDateConverson("Sat, 03 juN 2038 22:15:36 GMT", "03 06 2038 22:15:36"); - VerifyCookieDateConverson("Sun, 01 JuL 2038 22:15:36 GMT;", "01 07 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 06 aUg 2038 22:15:36 GMT", "06 08 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 01 SEP 2038 22:15:36 GMT;", "01 09 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 01 OCT 2038 22:15:36 GMT", "01 10 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 01 Nov 2038 22:15:36 GMT;", "01 11 2038 22:15:36"); - VerifyCookieDateConverson("Fri, 01 deC 2038 22:15:36 GMT", "01 12 2038 22:15:36"); - VerifyCookieDateConverson("", ""); - VerifyCookieDateConverson("Fri, 01 INVALID 2038 22:15:36 GMT;", - "01 INVALID 2038 22:15:36"); + VerifyCookieDateConversion("Mon, 01 jan 2038 22:15:36 GMT;", "01 01 2038 22:15:36"); + VerifyCookieDateConversion("TUE, 10 Feb 2038 22:15:36 GMT", "10 02 2038 22:15:36"); + VerifyCookieDateConversion("WED, 20 MAr 2038 22:15:36 GMT;", "20 03 2038 22:15:36"); + VerifyCookieDateConversion("thu, 15 APR 2038 22:15:36 GMT", "15 04 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 30 mAY 2038 22:15:36 GMT;", "30 05 2038 22:15:36"); + VerifyCookieDateConversion("Sat, 03 juN 2038 22:15:36 GMT", "03 06 2038 22:15:36"); + VerifyCookieDateConversion("Sun, 01 JuL 2038 22:15:36 GMT;", "01 07 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 06 aUg 2038 22:15:36 GMT", "06 08 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 01 SEP 2038 22:15:36 GMT;", "01 09 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 01 OCT 2038 22:15:36 GMT", "01 10 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 01 Nov 2038 22:15:36 GMT;", "01 11 2038 22:15:36"); + VerifyCookieDateConversion("Fri, 01 deC 2038 22:15:36 GMT", "01 12 2038 22:15:36"); + VerifyCookieDateConversion("", ""); + VerifyCookieDateConversion("Fri, 01 INVALID 2038 22:15:36 GMT;", + "01 INVALID 2038 22:15:36"); } TEST_F(TestCookieParsing, ParseCookieAttribute) { diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc index 020fb7b24efc3..55cc938870f85 100644 --- a/cpp/src/arrow/flight/flight_test.cc +++ b/cpp/src/arrow/flight/flight_test.cc @@ -453,7 +453,7 @@ class TestTls : public ::testing::Test { // get initialized. // https://github.com/grpc/grpc/issues/13856 // https://github.com/grpc/grpc/issues/20311 - // In general, gRPC on MacOS struggles with TLS (both in the sense + // In general, gRPC on macOS struggles with TLS (both in the sense // of thread-locals and encryption) grpc_init(); diff --git a/cpp/src/arrow/flight/integration_tests/test_integration_server.cc b/cpp/src/arrow/flight/integration_tests/test_integration_server.cc index 6f31b82d1a9f4..b301955db8f58 100644 --- a/cpp/src/arrow/flight/integration_tests/test_integration_server.cc +++ b/cpp/src/arrow/flight/integration_tests/test_integration_server.cc @@ -40,7 +40,7 @@ #include "arrow/flight/test_util.h" DEFINE_int32(port, 31337, "Server port to listen on"); -DEFINE_string(scenario, "", "Integration test senario to run"); +DEFINE_string(scenario, "", "Integration test scenario to run"); namespace arrow { namespace flight { diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h index 6eba90c53a754..ffcffe12e3c78 100644 --- a/cpp/src/arrow/flight/server.h +++ b/cpp/src/arrow/flight/server.h @@ -226,7 +226,7 @@ class ARROW_FLIGHT_EXPORT FlightServerBase { /// \brief Shut down the server, blocking until current requests finish. /// /// Can be called from a signal handler or another thread while Serve() - /// blocks. Optionally a deadline can be set. Once the the deadline expires + /// blocks. Optionally a deadline can be set. Once the deadline expires /// server will wait until remaining running calls complete. /// /// Should only be called once. @@ -262,7 +262,7 @@ class ARROW_FLIGHT_EXPORT FlightServerBase { /// \brief Retrieve the current status of the target query /// \param[in] context The call context. /// \param[in] request the dataset request or a descriptor returned by a - /// prioir PollFlightInfo call + /// prior PollFlightInfo call /// \param[out] info the returned retry info provider /// \return Status virtual Status PollFlightInfo(const ServerCallContext& context, diff --git a/cpp/src/arrow/flight/sql/example/sqlite_server.cc b/cpp/src/arrow/flight/sql/example/sqlite_server.cc index 5e1043713295f..20b234e90ad3b 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_server.cc +++ b/cpp/src/arrow/flight/sql/example/sqlite_server.cc @@ -598,7 +598,7 @@ class SQLiteFlightSqlServer::Impl { const ServerCallContext& context, const GetPrimaryKeys& command) { std::stringstream table_query; - // The field key_name can not be recovered by the sqlite, so it is being set + // The field key_name cannot be recovered by the sqlite, so it is being set // to null following the same pattern for catalog_name and schema_name. table_query << "SELECT null as catalog_name, null as schema_name, table_name, " "name as column_name, pk as key_sequence, null as key_name\n" diff --git a/cpp/src/arrow/flight/sql/example/sqlite_statement.cc b/cpp/src/arrow/flight/sql/example/sqlite_statement.cc index 2363925660028..0305a1fa6b475 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_statement.cc +++ b/cpp/src/arrow/flight/sql/example/sqlite_statement.cc @@ -130,7 +130,7 @@ arrow::Result> SqliteStatement::GetSchema() const { if (column_decltype != NULLPTR) { ARROW_ASSIGN_OR_RAISE(data_type, GetArrowType(column_decltype)); } else { - // If it can not determine the actual column type, return a dense_union type + // If it cannot determine the actual column type, return a dense_union type // covering any type SQLite supports. data_type = GetUnknownColumnDataType(); } diff --git a/cpp/src/arrow/flight/sql/example/sqlite_type_info.h b/cpp/src/arrow/flight/sql/example/sqlite_type_info.h index a104626c0f4eb..f26ddc31e7f37 100644 --- a/cpp/src/arrow/flight/sql/example/sqlite_type_info.h +++ b/cpp/src/arrow/flight/sql/example/sqlite_type_info.h @@ -24,11 +24,11 @@ namespace flight { namespace sql { namespace example { -/// \brief Gets the harded-coded type info from Sqlite for all data types. +/// \brief Gets the hard-coded type info from Sqlite for all data types. /// \return A record batch. arrow::Result> DoGetTypeInfoResult(); -/// \brief Gets the harded-coded type info from Sqlite filtering +/// \brief Gets the hard-coded type info from Sqlite filtering /// for a specific data type. /// \return A record batch. arrow::Result> DoGetTypeInfoResult(int data_type_filter); diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h index 360677c078c81..24f0aa2bd48cf 100644 --- a/cpp/src/arrow/flight/sql/server.h +++ b/cpp/src/arrow/flight/sql/server.h @@ -590,7 +590,7 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlServerBase : public FlightServerBase { /// \brief Commit/rollback a transaction. /// \param[in] context The call context. - /// \param[in] request The tranaction. + /// \param[in] request The transaction. virtual Status EndTransaction(const ServerCallContext& context, const ActionEndTransactionRequest& request); diff --git a/cpp/src/arrow/flight/sql/types.h b/cpp/src/arrow/flight/sql/types.h index 293b1d5579ec0..b41488b68f232 100644 --- a/cpp/src/arrow/flight/sql/types.h +++ b/cpp/src/arrow/flight/sql/types.h @@ -535,7 +535,7 @@ struct ARROW_FLIGHT_SQL_EXPORT SqlInfoOptions { /// allowed for a column name. SQL_MAX_COLUMN_NAME_LENGTH = 543, - /// Retrieves a int64 value representing the the maximum number of columns + /// Retrieves a int64 value representing the maximum number of columns /// allowed in a GROUP BY clause. SQL_MAX_COLUMNS_IN_GROUP_BY = 544, @@ -846,7 +846,7 @@ struct ARROW_FLIGHT_SQL_EXPORT SqlInfoOptions { /// The level of support for Flight SQL transaction RPCs. enum SqlSupportedTransaction { - /// Unknown/not indicated/no supoprt + /// Unknown/not indicated/no support SQL_SUPPORTED_TRANSACTION_NONE = 0, /// Transactions, but not savepoints. SQL_SUPPORTED_TRANSACTION_TRANSACTION = 1, diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index 40a0787d14a7a..2342c758273a3 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -575,7 +575,7 @@ struct ARROW_FLIGHT_EXPORT SchemaResult { std::string raw_schema_; }; -/// \brief The access coordinates for retireval of a dataset, returned by +/// \brief The access coordinates for retrieval of a dataset, returned by /// GetFlightInfo class ARROW_FLIGHT_EXPORT FlightInfo { public: @@ -604,7 +604,7 @@ class ARROW_FLIGHT_EXPORT FlightInfo { /// bookkeeping /// \param[in,out] dictionary_memo for dictionary bookkeeping, will /// be modified - /// \return Arrrow result with the reconstructed Schema + /// \return Arrow result with the reconstructed Schema arrow::Result> GetSchema( ipc::DictionaryMemo* dictionary_memo) const; diff --git a/cpp/src/arrow/integration/json_integration.h b/cpp/src/arrow/integration/json_integration.h index 0284ef6c89d97..13abfae095ab6 100644 --- a/cpp/src/arrow/integration/json_integration.h +++ b/cpp/src/arrow/integration/json_integration.h @@ -40,7 +40,7 @@ class ARROW_EXPORT IntegrationJsonWriter { /// \brief Create a new JSON writer that writes to memory /// /// \param[in] schema the schema of record batches - /// \return the creater writer object + /// \return the creator writer object static Result> Open( const std::shared_ptr& schema); diff --git a/cpp/src/arrow/io/concurrency.h b/cpp/src/arrow/io/concurrency.h index 43ceb8debcecb..85184c44ba2e4 100644 --- a/cpp/src/arrow/io/concurrency.h +++ b/cpp/src/arrow/io/concurrency.h @@ -83,7 +83,7 @@ class ARROW_EXPORT SharedExclusiveChecker { // concurrent calls to various methods. It is not necessary to wrap all // IO classes with these, only a few core classes that get used in tests. // -// We're not using virtual inheritance here as virtual bases have poorly +// We aren't using virtual inheritance here as virtual bases have poorly // understood semantic overhead which we'd be passing on to implementers // and users of these interfaces. Instead, we just duplicate the method // wrappers between those two classes. diff --git a/cpp/src/arrow/io/file_benchmark.cc b/cpp/src/arrow/io/file_benchmark.cc index 7fd10a0a0e659..b6e28efc1c983 100644 --- a/cpp/src/arrow/io/file_benchmark.cc +++ b/cpp/src/arrow/io/file_benchmark.cc @@ -141,7 +141,7 @@ class BackgroundReader { // Put fd in non-blocking mode fcntl(fd, F_SETFL, O_NONBLOCK); // Note the wakeup pipe itself does not need to be non-blocking, - // since we're not actually reading from it. + // since we aren't actually reading from it. } void LoopReading() { @@ -220,7 +220,7 @@ static void BenchmarkStreamingWrites(benchmark::State& state, // Benchmark writing to /dev/null // -// This situation is irrealistic as the kernel likely doesn't +// This situation is unrealistic as the kernel likely doesn't // copy the data at all, so we only measure small writes. static void FileOutputStreamSmallWritesToNull( diff --git a/cpp/src/arrow/io/interfaces.h b/cpp/src/arrow/io/interfaces.h index d2a11b7b6d7ce..b36c38c6d4868 100644 --- a/cpp/src/arrow/io/interfaces.h +++ b/cpp/src/arrow/io/interfaces.h @@ -196,7 +196,7 @@ class ARROW_EXPORT Readable { /// EXPERIMENTAL: The IOContext associated with this file. /// /// By default, this is the same as default_io_context(), but it may be - /// overriden by subclasses. + /// overridden by subclasses. virtual const IOContext& io_context() const; }; diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index 4f41edf8e15db..4154b594d9507 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -1423,7 +1423,7 @@ Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo, std::shared_ptr metadata; RETURN_NOT_OK(internal::GetKeyValueMetadata(schema->custom_metadata(), &metadata)); - // set endianess using the value in flatbuf schema + // set endianness using the value in flatbuf schema auto endianness = schema->endianness() == flatbuf::Endianness::Little ? Endianness::Little : Endianness::Big; diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 5c15cb912e4a7..17c4c5636d5b0 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -140,7 +140,7 @@ TEST_P(TestMessage, SerializeTo) { output_length); ASSERT_OK_AND_EQ(output_length, stream->Tell()); ASSERT_OK_AND_ASSIGN(auto buffer, stream->Finish()); - // chech whether length is written in little endian + // check whether length is written in little endian auto buffer_ptr = buffer.get()->data(); ASSERT_EQ(output_length - body_length - prefix_size, bit_util::FromLittleEndian(*(uint32_t*)(buffer_ptr + 4))); @@ -363,7 +363,7 @@ TEST_F(TestSchemaMetadata, MetadataVersionForwardCompatibility) { std::string root; ASSERT_OK(GetTestResourceRoot(&root)); - // schema_v6.arrow with currently non-existent MetadataVersion::V6 + // schema_v6.arrow with currently nonexistent MetadataVersion::V6 std::stringstream schema_v6_path; schema_v6_path << root << "/forward-compatibility/schema_v6.arrow"; @@ -520,7 +520,7 @@ class IpcTestFixture : public io::MemoryMapFixture, public ExtensionTypesMixin { }; TEST(MetadataVersion, ForwardsCompatCheck) { - // Verify UBSAN is ok with casting out of range metdata version. + // Verify UBSAN is ok with casting out of range metadata version. EXPECT_LT(flatbuf::MetadataVersion::MAX, static_cast(72)); } @@ -3019,14 +3019,14 @@ TEST(TestRecordBatchFileReaderIo, SkipTheFieldInTheMiddle) { GetReadRecordBatchReadRanges({0, 2}, {1, 40}); } -TEST(TestRecordBatchFileReaderIo, ReadTwoContinousFields) { +TEST(TestRecordBatchFileReaderIo, ReadTwoContinuousFields) { // read the int32 field and the int64 field // + 5 int32: 5 * 4 bytes // + 5 int64: 5 * 8 bytes GetReadRecordBatchReadRanges({1, 2}, {20, 40}); } -TEST(TestRecordBatchFileReaderIo, ReadTwoContinousFieldsWithIoMerged) { +TEST(TestRecordBatchFileReaderIo, ReadTwoContinuousFieldsWithIoMerged) { // change the array length to 64 so that bool field and int32 are continuous without // padding // read the bool field and the int32 field since the bool field's aligned offset diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index de4606094049c..888f59a627771 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -258,7 +258,7 @@ class ARROW_EXPORT Listener { virtual Status OnEOS(); /// \brief Called when a record batch is decoded and - /// OnRecordBatchWithMetadataDecoded() isn't overrided. + /// OnRecordBatchWithMetadataDecoded() isn't overridden. /// /// The default implementation just returns /// arrow::Status::NotImplemented(). diff --git a/cpp/src/arrow/json/converter_test.cc b/cpp/src/arrow/json/converter_test.cc index cfc44c99976d5..fa85e704bc5e3 100644 --- a/cpp/src/arrow/json/converter_test.cc +++ b/cpp/src/arrow/json/converter_test.cc @@ -39,7 +39,7 @@ Result> Convert(std::shared_ptr type, return converted; } -// bool, null are trivial pass throughs +// bool, null are trivial pass-throughs TEST(ConverterTest, Integers) { for (auto int_type : {int8(), int16(), int32(), int64()}) { diff --git a/cpp/src/arrow/json/reader.h b/cpp/src/arrow/json/reader.h index 7776cb0b7d8a0..b7849a83ba1f8 100644 --- a/cpp/src/arrow/json/reader.h +++ b/cpp/src/arrow/json/reader.h @@ -79,7 +79,7 @@ class ARROW_EXPORT StreamingReader : public RecordBatchReader { /// threading is disabled, this will block until completion. virtual Future> ReadNextAsync() = 0; - /// Get the number of bytes which have been succesfully converted to record batches + /// Get the number of bytes which have been successfully converted to record batches /// and consumed [[nodiscard]] virtual int64_t bytes_processed() const = 0; diff --git a/cpp/src/arrow/table_test.cc b/cpp/src/arrow/table_test.cc index 3949caa402846..5f6905ce672d2 100644 --- a/cpp/src/arrow/table_test.cc +++ b/cpp/src/arrow/table_test.cc @@ -179,7 +179,7 @@ TEST_F(TestTable, Equals) { other = Table::Make(schema_, other_columns); ASSERT_FALSE(table_->Equals(*other)); - // Differring schema metadata + // Differing schema metadata other_schema = schema_->WithMetadata(::arrow::key_value_metadata({"key"}, {"value"})); other = Table::Make(other_schema, columns_); ASSERT_TRUE(table_->Equals(*other)); @@ -635,8 +635,8 @@ TEST_F(TestTable, SelectColumns) { ASSERT_OK_AND_ASSIGN(auto subset, table->SelectColumns({0, 2})); ASSERT_OK(subset->ValidateFull()); - auto expexted_schema = ::arrow::schema({schema_->field(0), schema_->field(2)}); - auto expected = Table::Make(expexted_schema, {table->column(0), table->column(2)}); + auto expected_schema = ::arrow::schema({schema_->field(0), schema_->field(2)}); + auto expected = Table::Make(expected_schema, {table->column(0), table->column(2)}); ASSERT_TRUE(subset->Equals(*expected)); // Out of bounds indices diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc index e8a782575e278..36351fa8595be 100644 --- a/cpp/src/arrow/testing/util.cc +++ b/cpp/src/arrow/testing/util.cc @@ -198,7 +198,7 @@ std::string GetListenAddress() { ss << "." << byte; } #else - // On MacOS, only 127.0.0.1 is a valid loopback address by default. + // On macOS, only 127.0.0.1 is a valid loopback address by default. ss << "127.0.0.1"; #endif // Append port number diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc index 009e557f82f68..22913f77fbfc1 100644 --- a/cpp/src/arrow/type_test.cc +++ b/cpp/src/arrow/type_test.cc @@ -612,7 +612,7 @@ TEST_F(TestSchema, TestMetadataConstruction) { AssertSchemaEqual(schema2, schema1); AssertSchemaNotEqual(schema2, schema1, /*check_metadata=*/true); - // Field has different metatadata + // Field has different metadata AssertSchemaEqual(schema2, schema3); AssertSchemaNotEqual(schema2, schema3, /*check_metadata=*/true); diff --git a/cpp/src/arrow/util/align_util.h b/cpp/src/arrow/util/align_util.h index 63df63749cf5c..71920e49f4aa2 100644 --- a/cpp/src/arrow/util/align_util.h +++ b/cpp/src/arrow/util/align_util.h @@ -74,7 +74,7 @@ namespace util { /// \brief Special alignment value to use data type-specific alignment /// /// If this is passed as the `alignment` in one of the CheckAlignment or EnsureAlignment -/// functions, then the function will ensure ensure each buffer is suitably aligned +/// functions, then the function will ensure each buffer is suitably aligned /// for the data type of the array. For example, given an int32 buffer the values /// buffer's address must be a multiple of 4. Given a large_string buffer the offsets /// buffer's address must be a multiple of 8. diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h index a06be707f2fb0..f9bcd534567c6 100644 --- a/cpp/src/arrow/util/async_generator.h +++ b/cpp/src/arrow/util/async_generator.h @@ -715,7 +715,7 @@ AsyncGenerator MakeSerialReadaheadGenerator(AsyncGenerator source_generato /// generator() once before it returns. The returned generator will otherwise /// mirror the source. /// -/// This generator forwards aysnc-reentrant pressure to the source +/// This generator forwards async-reentrant pressure to the source /// This generator buffers one item (the first result) until it is delivered. template AsyncGenerator MakeAutoStartingGenerator(AsyncGenerator generator) { @@ -1843,7 +1843,7 @@ constexpr int kDefaultBackgroundQRestart = 16; /// active background thread task at any given time. You MUST transfer away from this /// background generator. Otherwise there could be a race condition if a callback on the /// background thread deletes the last consumer reference to the background generator. You -/// can transfer onto the same executor as the background thread, it is only neccesary to +/// can transfer onto the same executor as the background thread, it is only necessary to /// create a new thread task, not to switch executors. /// /// This generator is not async-reentrant diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc index 7fb99f167c605..2b74313db279b 100644 --- a/cpp/src/arrow/util/async_generator_test.cc +++ b/cpp/src/arrow/util/async_generator_test.cc @@ -719,7 +719,7 @@ TEST_P(MergedGeneratorTestFixture, MergedStress) { sources.push_back(source); } AsyncGenerator> source_gen = util::AsyncVectorIt(sources); - auto outer_gaurd = ExpectNotAccessedReentrantly(&source_gen); + auto outer_guard = ExpectNotAccessedReentrantly(&source_gen); auto merged = MakeMergedGenerator(source_gen, 4); ASSERT_FINISHES_OK_AND_ASSIGN(auto items, CollectAsyncGenerator(merged)); @@ -1095,7 +1095,7 @@ TEST_P(BackgroundGeneratorTestFixture, BadResult) { ASSERT_FINISHES_OK_AND_EQ(TestInt(1), generator()); // Next three results may or may not be valid. // The typical case is the call for TestInt(2) restarts a full queue and then maybe - // TestInt(3) and TestInt(4) arrive quickly enough to not get pre-empted or maybe + // TestInt(3) and TestInt(4) arrive quickly enough to not get preempted or maybe // they don't. // // A more bizarre, but possible, case is the checking thread falls behind the producer diff --git a/cpp/src/arrow/util/benchmark_util.h b/cpp/src/arrow/util/benchmark_util.h index 2a3dcf56f88bf..75639ac11ae41 100644 --- a/cpp/src/arrow/util/benchmark_util.h +++ b/cpp/src/arrow/util/benchmark_util.h @@ -161,7 +161,7 @@ class MemoryPoolMemoryManager : public benchmark::MemoryManager { int64_t new_default_allocations = default_pool->num_allocations() - global_allocations_start; - // Only record metrics metrics if (1) there were allocations and (2) we + // Only record metrics if (1) there were allocations and (2) we // recorded at least one. if (new_default_allocations > 0 && memory_pool->num_allocations() > 0) { if (new_default_allocations > memory_pool->num_allocations()) { diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index f77cc3193624c..73a1ee8600fb4 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -200,7 +200,7 @@ class ARROW_EXPORT BitBlockCounter { int64_t offset_; }; -/// \brief A tool to iterate through a possibly non-existent validity bitmap, +/// \brief A tool to iterate through a possibly nonexistent validity bitmap, /// to allow us to write one code path for both the with-nulls and no-nulls /// cases without giving up a lot of performance. class ARROW_EXPORT OptionalBitBlockCounter { diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 15eadc9f2e7ea..e026dfec24065 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -924,7 +924,7 @@ TEST(FirstTimeBitmapWriter, AppendWordOffsetOverwritesCorrectBitsOnExistingByte) writer.Finish(); EXPECT_EQ(BitmapToString(valid_bits, kBitsAfterAppend), expected_bits); }; - // 0ffset zero would not be a valid mask. + // Offset zero would not be a valid mask. check_with_set("11111111", 1); check_with_set("10111111", 2); check_with_set("10011111", 3); diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 89006ba887b29..5526c87dbcaf2 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -256,7 +256,7 @@ class BitmapWordReader { } }; -/// \brief Index into a possibly non-existent bitmap +/// \brief Index into a possibly nonexistent bitmap struct OptionalBitIndexer { const uint8_t* bitmap; const int64_t offset; diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h index ae85e2cfa81a3..4bc732ec24313 100644 --- a/cpp/src/arrow/util/byte_stream_split_internal.h +++ b/cpp/src/arrow/util/byte_stream_split_internal.h @@ -298,7 +298,7 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_value } // Path for float. - // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. + // 1. Processed hierarchically to 32i block using the unpack intrinsics. // 2. Pack 128i block using _mm256_permutevar8x32_epi32. // 3. Pack final 256i block with _mm256_permute2x128_si256. constexpr size_t kNumUnpack = 3U; @@ -534,7 +534,7 @@ void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_val final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101); } else { // Path for float. - // 1. Processed hierarchically to 32i blcok using the unpack intrinsics. + // 1. Processed hierarchically to 32i block using the unpack intrinsics. // 2. Pack 128i block using _mm256_permutevar8x32_epi32. // 3. Pack final 256i block with _mm256_permute2x128_si256. for (size_t i = 0; i < kNumStreams; ++i) diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc index 3ea27f57da881..c98f0a086738b 100644 --- a/cpp/src/arrow/util/byte_stream_split_test.cc +++ b/cpp/src/arrow/util/byte_stream_split_test.cc @@ -49,8 +49,8 @@ struct NamedFunc { }; // A simplistic reference implementation for validation -void RefererenceByteStreamSplitEncode(const uint8_t* src, int width, - const int64_t num_values, uint8_t* dest) { +void ReferenceByteStreamSplitEncode(const uint8_t* src, int width, + const int64_t num_values, uint8_t* dest) { for (int64_t i = 0; i < num_values; ++i) { for (int stream = 0; stream < width; ++stream) { dest[stream * num_values + i] = *src++; @@ -129,7 +129,7 @@ class TestByteStreamSplitSpecialized : public ::testing::Test { protected: static void ReferenceEncode(const uint8_t* raw_values, const int64_t num_values, uint8_t* output_buffer_raw) { - RefererenceByteStreamSplitEncode(raw_values, kWidth, num_values, output_buffer_raw); + ReferenceByteStreamSplitEncode(raw_values, kWidth, num_values, output_buffer_raw); } static std::vector MakeRandomInput(int64_t num_values) { diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index 13709aa2f0cde..ce71def497161 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -312,7 +312,7 @@ struct Decimal128RealConversion return x; } - /// An appoximate conversion from Decimal128 to Real that guarantees: + /// An approximate conversion from Decimal128 to Real that guarantees: /// 1. If the decimal is an integer, the conversion is exact. /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact @@ -1006,7 +1006,7 @@ struct Decimal256RealConversion return x; } - /// An appoximate conversion from Decimal256 to Real that guarantees: + /// An approximate conversion from Decimal256 to Real that guarantees: /// 1. If the decimal is an integer, the conversion is exact. /// 2. If the number of fractional digits is <= RealTraits::kMantissaDigits (e.g. /// 8 for float and 16 for double), the conversion is within 1 ULP of the exact diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h index 17c641c31c53d..283b581a5100a 100644 --- a/cpp/src/arrow/util/future.h +++ b/cpp/src/arrow/util/future.h @@ -435,7 +435,7 @@ class [[nodiscard]] Future { return MakeFinished(E::ToResult(std::move(s))); } - struct WrapResultyOnComplete { + struct WrapResultOnComplete { template struct Callback { void operator()(const FutureImpl& impl) && { @@ -461,7 +461,7 @@ class [[nodiscard]] Future { template using WrapOnComplete = typename std::conditional< detail::first_arg_is_status::value, WrapStatusyOnComplete, - WrapResultyOnComplete>::type::template Callback; + WrapResultOnComplete>::type::template Callback; /// \brief Consumer API: Register a callback to run when this future completes /// diff --git a/cpp/src/arrow/util/int_util.h b/cpp/src/arrow/util/int_util.h index 5ce9dc2820ee1..59a2ac7109a3c 100644 --- a/cpp/src/arrow/util/int_util.h +++ b/cpp/src/arrow/util/int_util.h @@ -113,7 +113,7 @@ Status CheckIntegersInRange(const ArraySpan& values, const Scalar& bound_lower, ARROW_EXPORT Status IntegersCanFit(const ArraySpan& values, const DataType& target_type); -/// \brief Convenience for boundschecking a single Scalar vlue +/// \brief Convenience for boundschecking a single Scalar value ARROW_EXPORT Status IntegersCanFit(const Scalar& value, const DataType& target_type); diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index ac92618ff6603..751ef28d415e0 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1466,7 +1466,7 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, return StatusFromMmapErrno("ftruncate failed"); } // we set READ / WRITE flags on the new map, since we could only have - // unlarged a RW map in the first place + // enlarged a RW map in the first place *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0); if (*new_addr == MAP_FAILED) { return StatusFromMmapErrno("mmap failed"); diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h index 0eae7f6a8571b..5e716d0fd113d 100644 --- a/cpp/src/arrow/util/iterator.h +++ b/cpp/src/arrow/util/iterator.h @@ -50,7 +50,7 @@ struct IterationTraits { static T End() { return T(NULLPTR); } /// \brief Checks to see if the value is a terminal value. - /// A method is used here since T is not neccesarily comparable in many + /// A method is used here since T is not necessarily comparable in many /// cases even though it has a distinct final value static bool IsEnd(const T& val) { return val == End(); } }; diff --git a/cpp/src/arrow/util/list_util.h b/cpp/src/arrow/util/list_util.h index 467f4eb15edb7..58deb8019d941 100644 --- a/cpp/src/arrow/util/list_util.h +++ b/cpp/src/arrow/util/list_util.h @@ -40,7 +40,7 @@ ARROW_EXPORT Result> RangeOfValuesUsed( /// /// This is usually the same as the length of the RangeOfValuesUsed() range, but /// it can be: -/// - Smaller: when the child array constains many values that are not +/// - Smaller: when the child array contains many values that are not /// referenced by the lists or list-views in the parent array /// - Greater: when the list-views share child array ranges /// diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc index 6d275fa2864a2..9c68982a3d59f 100644 --- a/cpp/src/arrow/util/logging.cc +++ b/cpp/src/arrow/util/logging.cc @@ -148,7 +148,7 @@ void ArrowLog::StartArrowLog(const std::string& app_name, #ifdef ARROW_USE_GLOG int mapped_severity_threshold = GetMappedSeverity(severity_threshold_); google::SetStderrLogging(mapped_severity_threshold); - // Enble log file if log_dir is not empty. + // Enable log file if log_dir is not empty. if (!log_dir.empty()) { auto dir_ends_with_slash = log_dir; if (log_dir[log_dir.length() - 1] != '/') { diff --git a/cpp/src/arrow/util/ree_util.cc b/cpp/src/arrow/util/ree_util.cc index 819de5eb60c63..83fb4d3a9a738 100644 --- a/cpp/src/arrow/util/ree_util.cc +++ b/cpp/src/arrow/util/ree_util.cc @@ -69,7 +69,7 @@ int64_t FindPhysicalIndexImpl(PhysicalIndexFinder& self, int64_t i) DCHECK_LT(i, self.array_span.length); const int64_t run_ends_size = ree_util::RunEndsArray(self.array_span).length; DCHECK_LT(self.last_physical_index, run_ends_size); - // This access to self.run_ends[last_physical_index] is alwas safe because: + // This access to self.run_ends[last_physical_index] is always safe because: // 1. 0 <= i < array_span.length() implies there is at least one run and the initial // value 0 will be safe to index with. // 2. last_physical_index > 0 is always the result of a valid call to diff --git a/cpp/src/arrow/util/ree_util.h b/cpp/src/arrow/util/ree_util.h index 2b7940154a50b..a3e745ba830a3 100644 --- a/cpp/src/arrow/util/ree_util.h +++ b/cpp/src/arrow/util/ree_util.h @@ -128,7 +128,7 @@ int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, int64_t absolute_off /// run-ends) necessary to represent the logical range of values from /// offset to length. /// -/// Avoid calling this function if the physical length can be estabilished in +/// Avoid calling this function if the physical length can be established in /// some other way (e.g. when iterating over the runs sequentially until the /// end). This function uses binary-search, so it has a O(log N) cost. template @@ -217,7 +217,7 @@ ARROW_EXPORT int64_t FindPhysicalIndex(const ArraySpan& span, int64_t i, /// run-ends) necessary to represent the logical range of values from /// offset to length. /// -/// Avoid calling this function if the physical length can be estabilished in +/// Avoid calling this function if the physical length can be established in /// some other way (e.g. when iterating over the runs sequentially until the /// end). This function uses binary-search, so it has a O(log N) cost. ARROW_EXPORT int64_t FindPhysicalLength(const ArraySpan& span); diff --git a/cpp/src/arrow/util/ree_util_test.cc b/cpp/src/arrow/util/ree_util_test.cc index 966cbd8f386f1..08a6a39b98d5c 100644 --- a/cpp/src/arrow/util/ree_util_test.cc +++ b/cpp/src/arrow/util/ree_util_test.cc @@ -101,7 +101,7 @@ TYPED_TEST_P(ReeUtilTest, PhysicalLength) { ASSERT_EQ(internal::FindPhysicalLength(run_ends246, 4, 0, 7), 0); } -TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) { +TYPED_TEST_P(ReeUtilTest, MergedRunsIteratorTest) { // Construct the following two test arrays with a lot of different offsets to test the // REE iterator: left: // @@ -387,7 +387,7 @@ TYPED_TEST_P(ReeUtilTest, MergedRunsInterator) { } REGISTER_TYPED_TEST_SUITE_P(ReeUtilTest, PhysicalIndex, PhysicalLength, - MergedRunsInterator); + MergedRunsIteratorTest); using RunEndsTypes = testing::Types; INSTANTIATE_TYPED_TEST_SUITE_P(ReeUtilTest, ReeUtilTest, RunEndsTypes); diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding.h index 3a517d24b46c6..e0f5690062a04 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding.h @@ -61,7 +61,7 @@ namespace util { /// on a byte boundary without padding. /// Given that we know it is a multiple of 8, we store the number of 8-groups rather than /// the actual number of encoded ints. (This means that the total number of encoded values -/// can not be determined from the encoded data, since the number of values in the last +/// cannot be determined from the encoded data, since the number of values in the last /// group may not be a multiple of 8). For the last group of literal runs, we pad /// the group to 8 with zeros. This allows for 8 at a time decoding on the read side /// without the need for additional checks. diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc index 5f8054f12161f..f222b938d5a32 100644 --- a/cpp/src/arrow/util/string_test.cc +++ b/cpp/src/arrow/util/string_test.cc @@ -136,7 +136,7 @@ TEST(SplitString, OuterLeftAndOuterRightDelimiter) { EXPECT_EQ(parts[4], ""); } -TEST(SplitString, OnlyDemiliter) { +TEST(SplitString, OnlyDelimiter) { std::string input = ":"; auto parts = SplitString(input, ':'); ASSERT_EQ(parts.size(), 2); diff --git a/cpp/src/arrow/util/tdigest.cc b/cpp/src/arrow/util/tdigest.cc index ee84a5ef6b2f5..ca864d98361b4 100644 --- a/cpp/src/arrow/util/tdigest.cc +++ b/cpp/src/arrow/util/tdigest.cc @@ -341,7 +341,7 @@ class TDigest::TDigestImpl { double total_weight() const { return total_weight_; } private: - // must be delcared before merger_, see constructor initialization list + // must be declared before merger_, see constructor initialization list const uint32_t delta_; TDigestMerger<> merger_; diff --git a/cpp/src/arrow/util/tdigest_test.cc b/cpp/src/arrow/util/tdigest_test.cc index f501285b34f5a..63395b676a61f 100644 --- a/cpp/src/arrow/util/tdigest_test.cc +++ b/cpp/src/arrow/util/tdigest_test.cc @@ -44,7 +44,7 @@ TEST(TDigestTest, SingleValue) { TDigest td; td.Add(value); ASSERT_OK(td.Validate()); - // all quantiles equal to same single vaue + // all quantiles equal to same single value for (double q = 0; q <= 1; q += 0.1) { EXPECT_EQ(td.Quantile(q), value); } diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc index ad30ca2e8052d..8f43bb8dec367 100644 --- a/cpp/src/arrow/util/thread_pool_test.cc +++ b/cpp/src/arrow/util/thread_pool_test.cc @@ -699,7 +699,7 @@ TEST_F(TestThreadPool, SetCapacity) { } ASSERT_OK(gating_task->WaitForRunning(3)); SleepFor(0.001); // Sleep a bit just to make sure it isn't making any threads - ASSERT_EQ(pool->GetActualCapacity(), 3); // maxxed out + ASSERT_EQ(pool->GetActualCapacity(), 3); // maxed out // The tasks have not finished yet, increasing the desired capacity // should spawn threads immediately. diff --git a/cpp/src/arrow/vendored/datetime/date.h b/cpp/src/arrow/vendored/datetime/date.h index fd2569c6de0f6..e4a216ee2bd93 100644 --- a/cpp/src/arrow/vendored/datetime/date.h +++ b/cpp/src/arrow/vendored/datetime/date.h @@ -6016,7 +6016,7 @@ to_stream(std::basic_ostream& os, const CharT* fmt, { if (offset_sec == nullptr) { - // Can not format %z with unknown offset + // Cannot format %z with unknown offset os.setstate(ios::failbit); return os; } @@ -6050,7 +6050,7 @@ to_stream(std::basic_ostream& os, const CharT* fmt, { if (abbrev == nullptr) { - // Can not format %Z with unknown time_zone + // Cannot format %Z with unknown time_zone os.setstate(ios::failbit); return os; } diff --git a/cpp/src/arrow/vendored/datetime/tz.cpp b/cpp/src/arrow/vendored/datetime/tz.cpp index 6962a8b3c3572..0097a2c2eb5d9 100644 --- a/cpp/src/arrow/vendored/datetime/tz.cpp +++ b/cpp/src/arrow/vendored/datetime/tz.cpp @@ -118,7 +118,7 @@ #include #include -// unistd.h is used on some platforms as part of the the means to get +// unistd.h is used on some platforms as part of the means to get // the current time zone. On Win32 windows.h provides a means to do it. // gcc/mingw supports unistd.h on Win32 but MSVC does not. @@ -643,7 +643,7 @@ bool native_to_standard_timezone_name(const std::string& native_tz_name, std::string& standard_tz_name) { - // TOOD! Need be a case insensitive compare? + // TOOD! Need be a case-insensitive compare? if (native_tz_name == "UTC") { standard_tz_name = "Etc/UTC"; @@ -3941,7 +3941,7 @@ tzdb::current_zone() const // On some OS's a file called /etc/localtime may // exist and it may be either a real file // containing time zone details or a symlink to such a file. - // On MacOS and BSD Unix if this file is a symlink it + // On macOS and BSD Unix if this file is a symlink it // might resolve to a path like this: // "/usr/share/zoneinfo/America/Los_Angeles" // If it does, we try to determine the current diff --git a/cpp/src/arrow/vendored/datetime/tz.h b/cpp/src/arrow/vendored/datetime/tz.h index 467db6d199793..fe217101db7ff 100644 --- a/cpp/src/arrow/vendored/datetime/tz.h +++ b/cpp/src/arrow/vendored/datetime/tz.h @@ -71,7 +71,7 @@ #endif static_assert(!(USE_OS_TZDB && HAS_REMOTE_API), - "USE_OS_TZDB and HAS_REMOTE_API can not be used together"); + "USE_OS_TZDB and HAS_REMOTE_API cannot be used together"); #ifdef __clang__ # pragma clang diagnostic pop @@ -82,7 +82,7 @@ static_assert(!(USE_OS_TZDB && HAS_REMOTE_API), #endif static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true, - "AUTO_DOWNLOAD can not be turned on without HAS_REMOTE_API"); + "AUTO_DOWNLOAD cannot be turned on without HAS_REMOTE_API"); #ifndef USE_SHELL_API # define USE_SHELL_API 1 @@ -90,7 +90,7 @@ static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true, #if USE_OS_TZDB # ifdef _WIN32 -# error "USE_OS_TZDB can not be used on Windows" +# error "USE_OS_TZDB cannot be used on Windows" # endif #endif diff --git a/cpp/src/arrow/vendored/double-conversion/fast-dtoa.h b/cpp/src/arrow/vendored/double-conversion/fast-dtoa.h index ddd0f04dcf02c..05266cbaf1c24 100644 --- a/cpp/src/arrow/vendored/double-conversion/fast-dtoa.h +++ b/cpp/src/arrow/vendored/double-conversion/fast-dtoa.h @@ -57,7 +57,7 @@ static const int kFastDtoaMaximalSingleLength = 9; // Precondition: // * v must be a strictly positive finite double. // -// Returns true if it succeeds, otherwise the result can not be trusted. +// Returns true if it succeeds, otherwise the result cannot be trusted. // There will be *length digits inside the buffer followed by a null terminator. // If the function returns true and mode equals // - FAST_DTOA_SHORTEST, then diff --git a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h index 9390228c3946a..961f26fa564bf 100644 --- a/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h +++ b/cpp/src/arrow/vendored/fast_float/decimal_to_binary.h @@ -157,7 +157,7 @@ adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { return answer; } - // usually, we round *up*, but if we fall right in between and and we have an + // usually, we round *up*, but if we fall right in between and we have an // even basis, we need to round down // We are only concerned with the cases where 5**q fits in single 64-bit word. if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && diff --git a/cpp/src/arrow/vendored/fast_float/float_common.h b/cpp/src/arrow/vendored/fast_float/float_common.h index 717320126750c..5011cf4ced011 100644 --- a/cpp/src/arrow/vendored/fast_float/float_common.h +++ b/cpp/src/arrow/vendored/fast_float/float_common.h @@ -137,7 +137,7 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { #endif } -// Compares two ASCII strings in a case insensitive manner. +// Compares two ASCII strings in a case-insensitive manner. inline FASTFLOAT_CONSTEXPR14 bool fastfloat_strncasecmp(const char *input1, const char *input2, size_t length) { char running_diff{0}; diff --git a/cpp/src/arrow/vendored/pcg/pcg_extras.hpp b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp index 36576cfa91d8c..c58d33fd63c57 100644 --- a/cpp/src/arrow/vendored/pcg/pcg_extras.hpp +++ b/cpp/src/arrow/vendored/pcg/pcg_extras.hpp @@ -502,7 +502,7 @@ void generate_to_impl(SeedSeq&& generator, DestIter dest, ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) : (size + (GEN_SIZE / DEST_SIZE) - 1) / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); - // this odd code ^^^^^^^^^^^^^^^^^ is work-around for + // this odd code ^^^^^^^^^^^^^^^^^ is workaround for // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 if (FROM_ELEMS <= 1024) { diff --git a/cpp/src/arrow/vendored/xxhash/xxhash.h b/cpp/src/arrow/vendored/xxhash/xxhash.h index a18e8c762daaa..de810c7b2e132 100644 --- a/cpp/src/arrow/vendored/xxhash/xxhash.h +++ b/cpp/src/arrow/vendored/xxhash/xxhash.h @@ -1443,7 +1443,7 @@ struct XXH32_state_s { XXH32_hash_t v[4]; /*!< Accumulator lanes */ XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ - XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ + XXH32_hash_t reserved; /*!< Reserved field. Do not read from nor write to it. */ }; /* typedef'd to XXH32_state_t */ diff --git a/cpp/src/gandiva/date_utils.cc b/cpp/src/gandiva/date_utils.cc index f0a80d3c95921..9d9f500f1262e 100644 --- a/cpp/src/gandiva/date_utils.cc +++ b/cpp/src/gandiva/date_utils.cc @@ -26,7 +26,7 @@ namespace gandiva { std::vector DateUtils::GetMatches(std::string pattern, bool exactMatch) { - // we are case insensitive + // we are case-insensitive std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower); std::vector matches; diff --git a/cpp/src/gandiva/expr_decomposer.cc b/cpp/src/gandiva/expr_decomposer.cc index 42566ca035159..df8eed5fd6316 100644 --- a/cpp/src/gandiva/expr_decomposer.cc +++ b/cpp/src/gandiva/expr_decomposer.cc @@ -248,7 +248,7 @@ Status ExprDecomposer::Visit(const LiteralNode& node) { return Status::OK(); } -// The bolow functions use a stack to detect : +// The below functions use a stack to detect : // a. nested if-else expressions. // In such cases, the local bitmap can be re-used. // b. detect terminal else expressions diff --git a/cpp/src/gandiva/expr_decomposer_test.cc b/cpp/src/gandiva/expr_decomposer_test.cc index 7681d9e646297..194c13bc82c86 100644 --- a/cpp/src/gandiva/expr_decomposer_test.cc +++ b/cpp/src/gandiva/expr_decomposer_test.cc @@ -333,7 +333,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) { int idx_cond_a_inner_if = decomposer.PushThenEntry(cond_node_a_inner_if, true); EXPECT_EQ(idx_cond_a_inner_if, - 0); // expect bitmap to be resused since nested if else + 0); // expect bitmap to be reused since nested if else decomposer.PopThenEntry(cond_node_a_inner_if); decomposer.PushElseEntry(cond_node_a_inner_if, idx_cond_a_inner_if); @@ -363,7 +363,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) { int idx_then_a_inner_if = decomposer.PushThenEntry(then_node_a_inner_if, true); EXPECT_EQ(idx_then_a_inner_if, - 2); // expect bitmap to be resused since nested if else + 2); // expect bitmap to be reused since nested if else decomposer.PopThenEntry(then_node_a_inner_if); decomposer.PushElseEntry(then_node_a_inner_if, idx_then_a_inner_if); @@ -392,7 +392,7 @@ TEST_F(TestExprDecomposer, TestComplexIfCondition) { int idx_else_a_inner_if = decomposer.PushThenEntry(else_node_a_inner_if, true); EXPECT_EQ(idx_else_a_inner_if, - 1); // expect bitmap to be resused since nested if else + 1); // expect bitmap to be reused since nested if else decomposer.PopThenEntry(else_node_a_inner_if); decomposer.PushElseEntry(else_node_a_inner_if, idx_else_a_inner_if); diff --git a/cpp/src/gandiva/expr_validator.cc b/cpp/src/gandiva/expr_validator.cc index 8a6f86e6f0419..cd76ffe08234e 100644 --- a/cpp/src/gandiva/expr_validator.cc +++ b/cpp/src/gandiva/expr_validator.cc @@ -79,7 +79,7 @@ Status ExprValidator::Visit(const FieldNode& node) { Status::ExpressionValidationError("Field ", node.field()->name(), " not in schema.")); - // Ensure that that the found field match. + // Ensure that the found field matches. FieldPtr field_in_schema = field_in_schema_entry->second; ARROW_RETURN_IF(!field_in_schema->Equals(node.field()), Status::ExpressionValidationError( diff --git a/cpp/src/gandiva/expression_cache_key.h b/cpp/src/gandiva/expression_cache_key.h index db174d0642eef..e7522042a7d43 100644 --- a/cpp/src/gandiva/expression_cache_key.h +++ b/cpp/src/gandiva/expression_cache_key.h @@ -34,19 +34,19 @@ class ExpressionCacheKey { public: ExpressionCacheKey(SchemaPtr schema, std::shared_ptr configuration, ExpressionVector expression_vector, SelectionVector::Mode mode) - : schema_(schema), mode_(mode), uniqifier_(0), configuration_(configuration) { + : schema_(schema), mode_(mode), uniquifier_(0), configuration_(configuration) { static const int kSeedValue = 4; size_t result = kSeedValue; for (auto& expr : expression_vector) { std::string expr_as_string = expr->ToString(); expressions_as_strings_.push_back(expr_as_string); arrow::internal::hash_combine(result, expr_as_string); - UpdateUniqifier(expr_as_string); + UpdateUniquifier(expr_as_string); } arrow::internal::hash_combine(result, static_cast(mode)); arrow::internal::hash_combine(result, configuration->Hash()); arrow::internal::hash_combine(result, schema_->ToString()); - arrow::internal::hash_combine(result, uniqifier_); + arrow::internal::hash_combine(result, uniquifier_); hash_code_ = result; } @@ -54,25 +54,25 @@ class ExpressionCacheKey { Expression& expression) : schema_(schema), mode_(SelectionVector::MODE_NONE), - uniqifier_(0), + uniquifier_(0), configuration_(configuration) { static const int kSeedValue = 4; size_t result = kSeedValue; expressions_as_strings_.push_back(expression.ToString()); - UpdateUniqifier(expression.ToString()); + UpdateUniquifier(expression.ToString()); arrow::internal::hash_combine(result, configuration->Hash()); arrow::internal::hash_combine(result, schema_->ToString()); - arrow::internal::hash_combine(result, uniqifier_); + arrow::internal::hash_combine(result, uniquifier_); hash_code_ = result; } - void UpdateUniqifier(const std::string& expr) { - if (uniqifier_ == 0) { + void UpdateUniquifier(const std::string& expr) { + if (uniquifier_ == 0) { // caching of expressions with re2 patterns causes lock contention. So, use // multiple instances to reduce contention. if (expr.find(" like(") != std::string::npos) { - uniqifier_ = std::hash()(std::this_thread::get_id()) % 16; + uniquifier_ = std::hash()(std::this_thread::get_id()) % 16; } } } @@ -100,7 +100,7 @@ class ExpressionCacheKey { return false; } - if (uniqifier_ != other.uniqifier_) { + if (uniquifier_ != other.uniquifier_) { return false; } @@ -114,7 +114,7 @@ class ExpressionCacheKey { SchemaPtr schema_; std::vector expressions_as_strings_; SelectionVector::Mode mode_; - uint32_t uniqifier_; + uint32_t uniquifier_; std::shared_ptr configuration_; }; diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 0ad3c1738e835..bcef954a473ea 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -209,7 +209,7 @@ GANDIVA_EXPORT const char* gdv_fn_base64_encode_binary(int64_t context, const char* in, int32_t in_len, int32_t* out_len) { if (in_len < 0) { - gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); + gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative"); *out_len = 0; return ""; } @@ -236,7 +236,7 @@ GANDIVA_EXPORT const char* gdv_fn_base64_decode_utf8(int64_t context, const char* in, int32_t in_len, int32_t* out_len) { if (in_len < 0) { - gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); + gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative"); *out_len = 0; return ""; } @@ -743,17 +743,17 @@ int32_t gdv_fn_cast_intervalyear_utf8_int32(int64_t context_ptr, int64_t holder_ } GANDIVA_EXPORT -gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds, +gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_milliseconds, const char* timezone, gdv_int32 length) { using arrow_vendored::date::locate_zone; using arrow_vendored::date::sys_time; using std::chrono::milliseconds; - sys_time tp{milliseconds{time_miliseconds}}; + sys_time tp{milliseconds{time_milliseconds}}; try { const auto local_tz = locate_zone(std::string(timezone, length)); gdv_timestamp offset = local_tz->get_info(tp).offset.count() * 1000; - return time_miliseconds - static_cast(offset); + return time_milliseconds - static_cast(offset); } catch (...) { std::string e_msg = std::string(timezone, length) + " is an invalid time zone name."; gdv_fn_context_set_error_msg(context, e_msg.c_str()); @@ -763,17 +763,17 @@ gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_mili GANDIVA_EXPORT gdv_timestamp from_utc_timezone_timestamp(gdv_int64 context, - gdv_timestamp time_miliseconds, + gdv_timestamp time_milliseconds, const char* timezone, gdv_int32 length) { using arrow_vendored::date::sys_time; using arrow_vendored::date::zoned_time; using std::chrono::milliseconds; - const sys_time tp{milliseconds{time_miliseconds}}; + const sys_time tp{milliseconds{time_milliseconds}}; try { const zoned_time local_tz{std::string(timezone, length), tp}; gdv_timestamp offset = local_tz.get_time_zone()->get_info(tp).offset.count() * 1000; - return time_miliseconds + static_cast(offset); + return time_milliseconds + static_cast(offset); } catch (...) { std::string e_msg = std::string(timezone, length) + " is an invalid time zone name."; gdv_fn_context_set_error_msg(context, e_msg.c_str()); diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index 3f52537ee05ca..8e87bc51215e1 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -342,11 +342,12 @@ const char* translate_utf8_utf8_utf8(int64_t context, const char* in, int32_t in int32_t to_len, int32_t* out_len); GANDIVA_EXPORT -gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds, +gdv_timestamp to_utc_timezone_timestamp(int64_t context, gdv_timestamp time_milliseconds, const char* timezone, int32_t length); GANDIVA_EXPORT -gdv_timestamp from_utc_timezone_timestamp(int64_t context, gdv_timestamp time_miliseconds, +gdv_timestamp from_utc_timezone_timestamp(int64_t context, + gdv_timestamp time_milliseconds, const char* timezone, int32_t length); GANDIVA_EXPORT diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc b/cpp/src/gandiva/gdv_function_stubs_test.cc index a8dfcd088ab17..3e403828a4cce 100644 --- a/cpp/src/gandiva/gdv_function_stubs_test.cc +++ b/cpp/src/gandiva/gdv_function_stubs_test.cc @@ -73,7 +73,7 @@ TEST(TestGdvFnStubs, TestCastVarbinaryNumeric) { EXPECT_FALSE(ctx.has_error()); gdv_fn_castVARBINARY_int32_int64(ctx_ptr, 347, -1, &out_len); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative")); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative")); ctx.Reset(); // tests for big integer values as input @@ -122,7 +122,7 @@ TEST(TestGdvFnStubs, TestBase64Encode) { value = gdv_fn_base64_encode_binary(ctx_ptr, "test", -5, &out_len); out_value = std::string(value, out_len); EXPECT_EQ(out_value, ""); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative")); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative")); ctx.Reset(); } @@ -151,7 +151,7 @@ TEST(TestGdvFnStubs, TestBase64Decode) { value = gdv_fn_base64_decode_utf8(ctx_ptr, "test", -5, &out_len); out_value = std::string(value, out_len); EXPECT_EQ(out_value, ""); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative")); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative")); ctx.Reset(); } @@ -323,7 +323,7 @@ TEST(TestGdvFnStubs, TestCastVARCHARFromInt32) { EXPECT_FALSE(ctx.has_error()); out_str = gdv_fn_castVARCHAR_int32_int64(ctx_ptr, 347, -1, &out_len); - EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length can not be negative")); + EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer length cannot be negative")); ctx.Reset(); } diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc b/cpp/src/gandiva/gdv_string_function_stubs.cc index 9f5b5ce64b4a9..17eefbe22e31b 100644 --- a/cpp/src/gandiva/gdv_string_function_stubs.cc +++ b/cpp/src/gandiva/gdv_string_function_stubs.cc @@ -84,7 +84,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64( \ int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) { \ if (len < 0) { \ - gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \ + gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative"); \ *out_len = 0; \ return ""; \ } \ @@ -120,7 +120,7 @@ const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t holder_pt const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64( \ int64_t context, gdv_##IN_TYPE value, int64_t len, int32_t * out_len) { \ if (len < 0) { \ - gdv_fn_context_set_error_msg(context, "Buffer length can not be negative"); \ + gdv_fn_context_set_error_msg(context, "Buffer length cannot be negative"); \ *out_len = 0; \ return ""; \ } \ diff --git a/cpp/src/gandiva/hash_utils_test.cc b/cpp/src/gandiva/hash_utils_test.cc index 96f9819e53cc2..65385023324ac 100644 --- a/cpp/src/gandiva/hash_utils_test.cc +++ b/cpp/src/gandiva/hash_utils_test.cc @@ -50,7 +50,7 @@ TEST(TestShaHashUtils, TestSha1Numeric) { std::string sha1_as_str(sha_1, out_length); EXPECT_EQ(sha1_as_str.size(), sha1_size); - // The value can not exists inside the set with the hash results + // The value cannot exists inside the set with the hash results EXPECT_EQ(sha_values.find(sha1_as_str), sha_values.end()); sha_values.insert(sha1_as_str); } @@ -85,7 +85,7 @@ TEST(TestShaHashUtils, TestSha512Numeric) { std::string sha512_as_str(sha_512, out_length); EXPECT_EQ(sha512_as_str.size(), sha512_size); - // The value can not exists inside the set with the hash results + // The value cannot exists inside the set with the hash results EXPECT_EQ(sha_values.find(sha512_as_str), sha_values.end()); sha_values.insert(sha512_as_str); } @@ -120,7 +120,7 @@ TEST(TestShaHashUtils, TestSha256Numeric) { std::string sha256_as_str(sha_256, out_length); EXPECT_EQ(sha256_as_str.size(), sha256_size); - // The value can not exists inside the set with the hash results + // The value cannot exists inside the set with the hash results EXPECT_EQ(sha_values.find(sha256_as_str), sha_values.end()); sha_values.insert(sha256_as_str); } @@ -154,7 +154,7 @@ TEST(TestShaHashUtils, TestMD5Numeric) { std::string md5_as_str(md5, out_length); EXPECT_EQ(md5_as_str.size(), md5_size); - // The value can not exists inside the set with the hash results + // The value cannot exists inside the set with the hash results EXPECT_EQ(md5_values.find(md5_as_str), md5_values.end()); md5_values.insert(md5_as_str); } diff --git a/cpp/src/gandiva/interval_holder.cc b/cpp/src/gandiva/interval_holder.cc index d63a11a10d341..a555da162e212 100644 --- a/cpp/src/gandiva/interval_holder.cc +++ b/cpp/src/gandiva/interval_holder.cc @@ -48,7 +48,7 @@ static const RE2 iso8601_period_without_time( static const std::regex period_not_contains_time(R"(^((?!T).)*$)"); // pre-compiled pattern for matching periods in 8601 formats that contains weeks inside -// them. The ISO8601 specification defines that if the string contains a week, it can not +// them. The ISO8601 specification defines that if the string contains a week, it cannot // have other time granularities information, like day, years and months. static const RE2 iso8601_period_with_weeks( R"(P(-?[[:digit:]]+W|-?[[:digit:]]+[,.][[:digit:]]+W){1})"); @@ -61,8 +61,8 @@ static const int64_t kMillisInAMinute = 60000; static const int64_t kMillisInASecond = 1000; static void return_error_with_cause(ExecutionContext* context, std::string& data, - int32_t supression_error) { - if (supression_error != 0) { + int32_t suppression_error) { + if (suppression_error != 0) { return; } diff --git a/cpp/src/gandiva/interval_holder.h b/cpp/src/gandiva/interval_holder.h index 38d8e9f86a9bc..1e6a5079ad193 100644 --- a/cpp/src/gandiva/interval_holder.h +++ b/cpp/src/gandiva/interval_holder.h @@ -73,7 +73,7 @@ class GANDIVA_EXPORT IntervalHolder : public FunctionHolder { return Status::OK(); } - explicit IntervalHolder(int32_t supress_errors) : suppress_errors_(supress_errors) {} + explicit IntervalHolder(int32_t suppress_errors) : suppress_errors_(suppress_errors) {} // If the flag is equals to 0, the errors will not be suppressed, any other value // will made the errors being suppressed @@ -104,17 +104,17 @@ class GANDIVA_EXPORT IntervalDaysHolder : public IntervalHolder(supress_errors) {} + explicit IntervalDaysHolder(int32_t suppress_errors) + : IntervalHolder(suppress_errors) {} private: - /// Retrieves the day interval from the number of milliseconds enconded as + /// Retrieves the day interval from the number of milliseconds encoded as /// a string static int64_t GetIntervalDayFromMillis(ExecutionContext* context, std::string& number_as_string, int32_t suppress_errors, bool* out_valid); - /// Retrieves the day interval from the number of weeks enconded as + /// Retrieves the day interval from the number of weeks encoded as /// a string. static int64_t GetIntervalDayFromWeeks(ExecutionContext* context, std::string& number_as_string, @@ -141,8 +141,8 @@ class GANDIVA_EXPORT IntervalYearsHolder : public IntervalHolder(supress_errors) {} + explicit IntervalYearsHolder(int32_t suppress_errors) + : IntervalHolder(suppress_errors) {} private: static int32_t GetIntervalYearFromNumber(ExecutionContext* context, diff --git a/cpp/src/gandiva/llvm_generator.h b/cpp/src/gandiva/llvm_generator.h index fae6ed48defa5..250ab78fbfe28 100644 --- a/cpp/src/gandiva/llvm_generator.h +++ b/cpp/src/gandiva/llvm_generator.h @@ -146,7 +146,7 @@ class GANDIVA_EXPORT LLVMGenerator { const ValueValidityPairVector& args, bool with_validity, bool with_context); - // Generate code to onvoke a function call. + // Generate code to invoke a function call. LValuePtr BuildFunctionCall(const NativeFunction* func, DataTypePtr arrow_return_type, std::vector* params); diff --git a/cpp/src/gandiva/precompiled/decimal_ops.cc b/cpp/src/gandiva/precompiled/decimal_ops.cc index 61cac60624dfa..6ecb9368fbb67 100644 --- a/cpp/src/gandiva/precompiled/decimal_ops.cc +++ b/cpp/src/gandiva/precompiled/decimal_ops.cc @@ -401,7 +401,7 @@ BasicDecimal128 Mod(int64_t context, const BasicDecimalScalar128& x, return 0; } - // Adsjust x and y to the same scale (higher one), and then, do a integer mod. + // Adjust x and y to the same scale (higher one), and then, do a integer mod. *overflow = false; BasicDecimal128 result; int32_t min_lz = MinLeadingZeros(x, y); @@ -559,7 +559,7 @@ enum RoundType { // else if -ve and trailing value is >= half of base, -1. }; -// Compute the rounding delta for the givven rounding type. +// Compute the rounding delta for the given rounding type. static int32_t ComputeRoundingDelta(const BasicDecimal128& x, int32_t x_scale, int32_t out_scale, RoundType type) { if (type == kRoundTypeTrunc || // no rounding for this type. diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index c255b9a11c084..5aa0eb38eafd7 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -1705,7 +1705,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c return in1_len; } - // arr_larger and arr_smaller is one pointer for entrys + // arr_larger and arr_smaller is one pointer for entries const char* arr_larger; const char* arr_smaller; // len_larger and len_smaller is one copy from lengths @@ -1733,7 +1733,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c return 0; } - // MEMORY ADRESS MALLOC + // MEMORY ADDRESS MALLOC // v0 -> (0, ..., &ptr[in2_len]) // v1 -> (in2_len+1, ..., &ptr[in2_len * 2]) int* v0; @@ -1742,7 +1742,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c v0 = &ptr[0]; v1 = &ptr[len_smaller + 1]; - // Initializate v0 + // Initialize v0 for (int i = 0; i <= len_smaller; i++) { v0[i] = i; } @@ -1778,7 +1778,7 @@ gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const c v1[j + 1] = min; } - // Swaping v0 and v1 + // Swapping v0 and v1 aux = v0; v0 = v1; v1 = aux; diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc index b84c51b3a6b00..89213592e7ea2 100644 --- a/cpp/src/gandiva/precompiled/string_ops_test.cc +++ b/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -2156,67 +2156,67 @@ TEST(TestStringOps, TestEltFunction) { // gandiva::ExecutionContext ctx; // int64_t ctx_ptr = reinterpret_cast(&ctx); gdv_int32 out_len = 0; - bool out_vality = false; + bool out_validity = false; const char* word1 = "john"; auto word1_len = static_cast(strlen(word1)); const char* word2 = ""; auto word2_len = static_cast(strlen(word2)); auto out_string = elt_int32_utf8_utf8(1, true, word1, word1_len, true, word2, word2_len, - true, &out_vality, &out_len); + true, &out_validity, &out_len); EXPECT_EQ("john", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, true); + EXPECT_EQ(out_validity, true); word1 = "hello"; word1_len = static_cast(strlen(word1)); word2 = "world"; word2_len = static_cast(strlen(word2)); out_string = elt_int32_utf8_utf8(2, true, word1, word1_len, true, word2, word2_len, - true, &out_vality, &out_len); + true, &out_validity, &out_len); EXPECT_EQ("world", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, true); + EXPECT_EQ(out_validity, true); word1 = "goodbye"; word1_len = static_cast(strlen(word1)); word2 = "world"; word2_len = static_cast(strlen(word2)); out_string = elt_int32_utf8_utf8(4, true, word1, word1_len, true, word2, word2_len, - true, &out_vality, &out_len); + true, &out_validity, &out_len); EXPECT_EQ("", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, false); + EXPECT_EQ(out_validity, false); word1 = "hi"; word1_len = static_cast(strlen(word1)); word2 = "yeah"; word2_len = static_cast(strlen(word2)); out_string = elt_int32_utf8_utf8(0, true, word1, word1_len, true, word2, word2_len, - true, &out_vality, &out_len); + true, &out_validity, &out_len); EXPECT_EQ("", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, false); + EXPECT_EQ(out_validity, false); const char* word3 = "wow"; auto word3_len = static_cast(strlen(word3)); out_string = elt_int32_utf8_utf8_utf8(3, true, word1, word1_len, true, word2, word2_len, true, - word3, word3_len, true, &out_vality, &out_len); + word3, word3_len, true, &out_validity, &out_len); EXPECT_EQ("wow", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, true); + EXPECT_EQ(out_validity, true); const char* word4 = "awesome"; auto word4_len = static_cast(strlen(word4)); out_string = elt_int32_utf8_utf8_utf8_utf8( 4, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true, - word4, word4_len, true, &out_vality, &out_len); + word4, word4_len, true, &out_validity, &out_len); EXPECT_EQ("awesome", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, true); + EXPECT_EQ(out_validity, true); const char* word5 = "not-empty"; auto word5_len = static_cast(strlen(word5)); out_string = elt_int32_utf8_utf8_utf8_utf8_utf8( 5, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true, - word4, word4_len, true, word5, word5_len, true, &out_vality, &out_len); + word4, word4_len, true, word5, word5_len, true, &out_validity, &out_len); EXPECT_EQ("not-empty", std::string(out_string, out_len)); - EXPECT_EQ(out_vality, true); + EXPECT_EQ(out_validity, true); } TEST(TestStringOps, TestToHex) { diff --git a/cpp/src/generated/Schema_generated.h b/cpp/src/generated/Schema_generated.h index eeeeac68f0a45..12ee81e6743b5 100644 --- a/cpp/src/generated/Schema_generated.h +++ b/cpp/src/generated/Schema_generated.h @@ -1725,7 +1725,7 @@ inline ::flatbuffers::Offset