diff --git a/source/detail/constants.cpp b/source/detail/constants.cpp index 4fe31bb81..9cccf31e8 100644 --- a/source/detail/constants.cpp +++ b/source/detail/constants.cpp @@ -49,6 +49,11 @@ const column_t constants::max_column() return column_t(std::numeric_limits::max()); } +const size_t constants::max_elements_for_reserve() +{ + return 10000; +} + // constants const path constants::package_properties() { diff --git a/source/detail/constants.hpp b/source/detail/constants.hpp index 16acadeac..1e96aeeb1 100644 --- a/source/detail/constants.hpp +++ b/source/detail/constants.hpp @@ -54,6 +54,14 @@ struct XLNT_API constants /// static const column_t max_column(); + /// + /// Returns the maximum amount of elements that functions like std::vector::reserve (or other containers) are allowed to allocate. + /// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers) + /// to allocate the memory right away and thus improve performance. However, malicious or broken files + /// might then cause XLNT to allocate extreme amounts of memory. This function sets a limit to protect against such issues. + /// + static const size_t max_elements_for_reserve(); + /// /// Returns the URI of the directory containing package properties. /// diff --git a/source/detail/limits.hpp b/source/detail/limits.hpp new file mode 100644 index 000000000..a4c47856d --- /dev/null +++ b/source/detail/limits.hpp @@ -0,0 +1,44 @@ +// Copyright (c) 2014-2021 Thomas Fussell +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// @license: http://www.opensource.org/licenses/mit-license.php +// @author: see AUTHORS file + +#pragma once + +#include "constants.hpp" + +namespace xlnt { +namespace detail { + +/// +/// Clips the maximum number of reserved elements to a certain upper limit. +/// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers) +/// to allocate the memory right away and thus improve performance. However, malicious or broken files +/// might then cause XLNT to allocate extreme amounts of memory. This function clips the number of elements +/// to an upper limit to protect against such issues, but still allow the caller to pre-allocate memory. +/// +inline size_t clip_reserve_elements(size_t num_elements) +{ + return std::min(num_elements, xlnt::constants::max_elements_for_reserve()); +} + +} // namespace detail +} // namespace xlnt diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index dc50106cb..016530385 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -46,6 +46,7 @@ #include #include #include +#include namespace { /// string_equal @@ -267,7 +268,7 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser, std::uno case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } // Prevents unhandled exceptions from being triggered. @@ -344,7 +345,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } } @@ -382,7 +383,7 @@ Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } } @@ -2280,10 +2281,12 @@ void xlsx_consumer::read_shared_string_table() expect_end_element(qn("spreadsheetml", "sst")); +#ifdef THROW_ON_INVALID_XML if (has_unique_count && unique_count != target_.shared_strings().size()) { throw invalid_file("sizes don't match"); } +#endif } void xlsx_consumer::read_shared_workbook_revision_headers() @@ -2317,7 +2320,12 @@ void xlsx_consumer::read_stylesheet() if (current_style_element == qn("spreadsheetml", "borders")) { auto &borders = stylesheet.borders; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + borders.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } while (in_element(qn("spreadsheetml", "borders"))) { @@ -2370,15 +2378,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "border")); } - if (count != borders.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != borders.size()) { throw xlnt::exception("border counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "fills")) { auto &fills = stylesheet.fills; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + fills.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } while (in_element(qn("spreadsheetml", "fills"))) { @@ -2455,15 +2470,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "fill")); } - if (count != fills.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != fills.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "fonts")) { auto &fonts = stylesheet.fonts; - auto count = parser().attribute("count", 0); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + fonts.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } if (parser().attribute_present(qn("x14ac", "knownFonts"))) { @@ -2598,15 +2620,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "font")); } - if (count != stylesheet.fonts.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != stylesheet.fonts.size()) { - // throw xlnt::exception("counts don't match"); + throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "numFmts")) { auto &number_formats = stylesheet.number_formats; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + number_formats.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } while (in_element(qn("spreadsheetml", "numFmts"))) { @@ -2629,14 +2658,21 @@ void xlsx_consumer::read_stylesheet() number_formats.push_back(nf); } - if (count != number_formats.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != number_formats.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "cellStyles")) { - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + styles.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } while (in_element(qn("spreadsheetml", "cellStyles"))) { @@ -2665,16 +2701,30 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "cellStyle")); } - if (count != styles.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != styles.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "cellStyleXfs") || current_style_element == qn("spreadsheetml", "cellXfs")) { auto in_style_records = current_style_element.name() == "cellStyleXfs"; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + if (in_style_records) + { + style_records.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } + else + { + format_records.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } + } while (in_element(current_style_element)) { @@ -2803,15 +2853,16 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "xf")); } - if ((in_style_records && count != style_records.size()) - || (!in_style_records && count != format_records.size())) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && ((in_style_records && count != style_records.size()) + || (!in_style_records && count != format_records.size()))) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "dxfs")) { - auto count = parser().attribute("count"); std::size_t processed = 0; while (in_element(current_style_element)) @@ -2822,17 +2873,22 @@ void xlsx_consumer::read_stylesheet() ++processed; } - if (count != processed) +#ifdef THROW_ON_INVALID_XML + if (parser().attribute_present("count")) { - throw xlnt::exception("counts don't match"); + std::size_t count = parser().attribute("count"); + if (count != processed) + { + throw xlnt::exception("counts don't match"); + } } +#endif } else if (current_style_element == qn("spreadsheetml", "tableStyles")) { skip_attribute("defaultTableStyle"); skip_attribute("defaultPivotStyle"); - auto count = parser().attribute("count"); std::size_t processed = 0; while (in_element(qn("spreadsheetml", "tableStyles"))) @@ -2843,10 +2899,16 @@ void xlsx_consumer::read_stylesheet() ++processed; } - if (count != processed) +#ifdef THROW_ON_INVALID_XML + if (parser().attribute_present("count")) { - throw xlnt::exception("counts don't match"); + std::size_t count = parser().attribute("count"); + if (count != processed) + { + throw xlnt::exception("counts don't match"); + } } +#endif } else if (current_style_element == qn("spreadsheetml", "extLst")) { diff --git a/tests/data/Issue735_wrong_count.xlsx b/tests/data/Issue735_wrong_count.xlsx new file mode 100644 index 000000000..62a7beeee Binary files /dev/null and b/tests/data/Issue735_wrong_count.xlsx differ diff --git a/tests/workbook/serialization_test_suite.cpp b/tests/workbook/serialization_test_suite.cpp index ca6383a18..473da18c4 100644 --- a/tests/workbook/serialization_test_suite.cpp +++ b/tests/workbook/serialization_test_suite.cpp @@ -71,6 +71,7 @@ class serialization_test_suite : public test_suite register_test(test_Issue445_inline_str_streaming_read); register_test(test_Issue492_stream_empty_row); register_test(test_Issue503_external_link_load); + register_test(test_Issue735_wrong_count); register_test(test_formatting); register_test(test_active_sheet); } @@ -762,6 +763,13 @@ class serialization_test_suite : public test_suite auto cell = ws.cell("A1"); xlnt_assert_equals(cell.value(), std::string("WDG_IC_00000003.aut")); } + + void test_Issue735_wrong_count() + { + xlnt::workbook wb; + wb.load(path_helper::test_file("Issue735_wrong_count.xlsx")); + xlnt_assert_throws_nothing(wb.active_sheet()); + } void test_formatting() {