From 7e2e3d3a17bff1dbbc1f00d7a2ff85d4c68be13c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Leahu-Vl=C4=83ducu?= Date: Wed, 22 May 2024 09:24:44 +0200 Subject: [PATCH 1/5] Fixes issue #735: https://github.com/tfussell/xlnt/issues/735 --- source/detail/serialization/xlsx_consumer.cpp | 103 ++++++++++++++---- 1 file changed, 82 insertions(+), 21 deletions(-) diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index dc50106cb..0b24a76d5 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -267,7 +267,7 @@ xlnt::detail::Cell parse_cell(xlnt::row_t row_arg, xml::parser *parser, std::uno case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } // Prevents unhandled exceptions from being triggered. @@ -344,7 +344,7 @@ std::pair parse_row(xml::parser *parser, xlnt::detail case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } } @@ -382,7 +382,7 @@ Sheet_Data parse_sheet_data(xml::parser *parser, xlnt::detail::number_serialiser case xml::parser::end_attribute: case xml::parser::eof: default: { - throw xlnt::exception("unexcpected XML parsing event"); + throw xlnt::exception("unexpected XML parsing event"); } } } @@ -2317,7 +2317,12 @@ void xlsx_consumer::read_stylesheet() if (current_style_element == qn("spreadsheetml", "borders")) { auto &borders = stylesheet.borders; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + borders.reserve(count.get()); + } while (in_element(qn("spreadsheetml", "borders"))) { @@ -2370,15 +2375,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "border")); } - if (count != borders.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != borders.size()) { throw xlnt::exception("border counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "fills")) { auto &fills = stylesheet.fills; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + fills.reserve(count.get()); + } while (in_element(qn("spreadsheetml", "fills"))) { @@ -2455,15 +2467,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "fill")); } - if (count != fills.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != fills.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "fonts")) { auto &fonts = stylesheet.fonts; - auto count = parser().attribute("count", 0); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + fonts.reserve(count.get()); + } if (parser().attribute_present(qn("x14ac", "knownFonts"))) { @@ -2598,15 +2617,22 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "font")); } - if (count != stylesheet.fonts.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != stylesheet.fonts.size()) { - // throw xlnt::exception("counts don't match"); + throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "numFmts")) { auto &number_formats = stylesheet.number_formats; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + number_formats.reserve(count.get()); + } while (in_element(qn("spreadsheetml", "numFmts"))) { @@ -2629,14 +2655,21 @@ void xlsx_consumer::read_stylesheet() number_formats.push_back(nf); } - if (count != number_formats.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != number_formats.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "cellStyles")) { - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + styles.reserve(count.get()); + } while (in_element(qn("spreadsheetml", "cellStyles"))) { @@ -2665,16 +2698,30 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "cellStyle")); } - if (count != styles.size()) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != styles.size()) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "cellStyleXfs") || current_style_element == qn("spreadsheetml", "cellXfs")) { auto in_style_records = current_style_element.name() == "cellStyleXfs"; - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + if (in_style_records) + { + style_records.reserve(count.get()); + } + else + { + format_records.reserve(count.get()); + } + } while (in_element(current_style_element)) { @@ -2803,15 +2850,21 @@ void xlsx_consumer::read_stylesheet() expect_end_element(qn("spreadsheetml", "xf")); } - if ((in_style_records && count != style_records.size()) - || (!in_style_records && count != format_records.size())) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && ((in_style_records && count != style_records.size()) + || (!in_style_records && count != format_records.size()))) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "dxfs")) { - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + } std::size_t processed = 0; while (in_element(current_style_element)) @@ -2822,17 +2875,23 @@ void xlsx_consumer::read_stylesheet() ++processed; } - if (count != processed) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != processed) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "tableStyles")) { skip_attribute("defaultTableStyle"); skip_attribute("defaultPivotStyle"); - auto count = parser().attribute("count"); + optional count; + if (parser().attribute_present("count")) + { + count = parser().attribute("count"); + } std::size_t processed = 0; while (in_element(qn("spreadsheetml", "tableStyles"))) @@ -2843,10 +2902,12 @@ void xlsx_consumer::read_stylesheet() ++processed; } - if (count != processed) +#ifdef THROW_ON_INVALID_XML + if (count.is_set() && count != processed) { throw xlnt::exception("counts don't match"); } +#endif } else if (current_style_element == qn("spreadsheetml", "extLst")) { From f081ca49ab7974b9cf6afa4db345d8833e39d595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Leahu-Vl=C4=83ducu?= Date: Thu, 15 Aug 2024 22:42:59 +0200 Subject: [PATCH 2/5] Added a limit for memory allocation when XLSX files contain a "count" attribute. Added a unit tests with multiple wrong "count" values. Fixed a related exception when loading the shared string table with the wrong "uniqueCount". --- source/detail/constants.cpp | 5 +++ source/detail/constants.hpp | 8 ++++ source/detail/serialization/xlsx_consumer.cpp | 42 +++++++++++++----- tests/data/Issue735_wrong_count.xlsx | Bin 0 -> 7006 bytes tests/workbook/serialization_test_suite.cpp | 8 ++++ 5 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 tests/data/Issue735_wrong_count.xlsx diff --git a/source/detail/constants.cpp b/source/detail/constants.cpp index 4fe31bb81..3cb8423e7 100644 --- a/source/detail/constants.cpp +++ b/source/detail/constants.cpp @@ -49,6 +49,11 @@ const column_t constants::max_column() return column_t(std::numeric_limits::max()); } +const size_t constants::max_elements_for_reserve() +{ + return 10'000; +} + // constants const path constants::package_properties() { diff --git a/source/detail/constants.hpp b/source/detail/constants.hpp index 16acadeac..42558a912 100644 --- a/source/detail/constants.hpp +++ b/source/detail/constants.hpp @@ -54,6 +54,14 @@ struct XLNT_API constants /// static const column_t max_column(); + /// + /// Returns the maximum amount of elements that functions like std::vector::reserve are allowed to allocate. + /// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve to + /// allocate the memory right away and thus improve performance. However, malicious or broken files + /// might then cause XLNT to allocate extreme amounts of memory. This function sets a limit to protect against such issues. + /// + static const size_t max_elements_for_reserve(); + /// /// Returns the URI of the directory containing package properties. /// diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index 0b24a76d5..0a3404e6c 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -2280,10 +2280,12 @@ void xlsx_consumer::read_shared_string_table() expect_end_element(qn("spreadsheetml", "sst")); +#ifdef THROW_ON_INVALID_XML if (has_unique_count && unique_count != target_.shared_strings().size()) { throw invalid_file("sizes don't match"); } +#endif } void xlsx_consumer::read_shared_workbook_revision_headers() @@ -2321,7 +2323,10 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - borders.reserve(count.get()); + if (count.get() <= xlnt::constants::max_elements_for_reserve()) + { + borders.reserve(count.get()); + } } while (in_element(qn("spreadsheetml", "borders"))) @@ -2389,7 +2394,10 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - fills.reserve(count.get()); + if (count.get() <= xlnt::constants::max_elements_for_reserve()) + { + fills.reserve(count.get()); + } } while (in_element(qn("spreadsheetml", "fills"))) @@ -2481,7 +2489,10 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - fonts.reserve(count.get()); + if (count.get() <= xlnt::constants::max_elements_for_reserve()) + { + fonts.reserve(count.get()); + } } if (parser().attribute_present(qn("x14ac", "knownFonts"))) @@ -2631,7 +2642,10 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - number_formats.reserve(count.get()); + if (count.get() <= xlnt::constants::max_elements_for_reserve()) + { + number_formats.reserve(count.get()); + } } while (in_element(qn("spreadsheetml", "numFmts"))) @@ -2668,7 +2682,10 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - styles.reserve(count.get()); + if (count.get() <= xlnt::constants::max_elements_for_reserve()) + { + styles.reserve(count.get()); + } } while (in_element(qn("spreadsheetml", "cellStyles"))) @@ -2713,13 +2730,16 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (in_style_records) - { - style_records.reserve(count.get()); - } - else + if (count.get() <= xlnt::constants::max_elements_for_reserve()) { - format_records.reserve(count.get()); + if (in_style_records) + { + style_records.reserve(count.get()); + } + else + { + format_records.reserve(count.get()); + } } } diff --git a/tests/data/Issue735_wrong_count.xlsx b/tests/data/Issue735_wrong_count.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..62a7beeeefa8e285c46c376296acd8d86cce5cbb GIT binary patch literal 7006 zcmb7I1yodP*B-j0Taa>)p-UK$5~KtvNr!Hbo}nA0JEaw*8zn>#5CIX85ov}-kOm1U z0sq0e_;J^N?>%dsnKN_Vcki>Go##&S$h0q+N>`pA; zBSV{Agz&hU%33_SPu_%Y9R^3Zp=nY<(iCy#c(Jf4l8!VYE6CDyW!U(lGU?qy)_d`Y@hKz` zH6&!TG7E*J!$aL7mps+(S&eou*I+bR52Bh;O+<<@0vTT1!xLT02u$_&E3 zkvjLF{@>E`m3L8`Je0kSW-Oyh11f0`xeOXU8w7FPZ_yq~7t{G{`kbd(+1ptW8`_0q=YJTRp-h(%HG77i{KyoWC3v4oIm8dyr+F1buDY%jM)3Z(?4`m+m!s6&mSu$B(7x1|?>7C%E8%i-rrcIZ2XkiI^uN%)m`un-0l3?q zOQStxw)IgFn`Izxv>?y397HaFSs6pk&D5Qt*7laLGmpr`wBjN?BE1>-jcCtaojpuG zfeyXp<`ylY7XNxi`eeju6EPv(V(%sX0-uiCV!SYUGh^MM?6`(OtjipF+>5;^WRZna zD&c*vX{aI34X7>D@)d23$G3e!%u2Yktkj+j}wp?I|^Mx3aK zixX)6d@KM05~UD53FoKHBzEfw4>26Ha+39X%7rz7^~+jpfN(kCy`@;mLvLhUkzoP= z_&<$t_OLboZbi9>6Qr2E{P2ES8z%nNhQ=i6VT zz0j+J2D)F9_Y%lGhiIC*S(TFzB}LID0Vm@3z;F~I%A_5v{UP6CUtY?Yf z19($wd|Y%prPX5jz2)^i?JbMd&Yg^2*NnLU|)kLc_vr^kPz;bvR_sIa`_AnV(C|vkvDXo@PCU z0RTi@1^`IUFZ;fj_bfBl@9E+pW7?PUm@t2{df%0_(020ENuaiOlzyG;fCVt^hBT4S zwhWPmH?jAQq_m{uRtNo6kb!Sd;kw?qz)na+5shtEmz#2PW@09AJw~6Bv?QxjkLaPR zi%mwcjZpMxYTiJF@7zJr(69ib+6~#H{Q6Svsy?^%i3W2l1ea;15&o|F(whN~WPO6w_S{$}bCx7A*{Qa9y zGT*zbe3H_KPeG(w0=R1jnaL;N!Pu-L@T-WSoX5P4j{KscW`>P>dK6iS69hX20E29xSCH6Ake_!Z^dN4f;4h4eITT-N(B&p=aE6am@YuZ@i zV(V+>eLZH&-o zD1=|7a0Pz&GD0Mx&qH&17UCbeLw(01P@|7(#ELG5$nJUv>qE%!8@O?qs)!PHadeDy zCXQe{!urVpe-3SRQ4PWKpGiqw>r^M)-o%zgdekHllYl-In*TmHfcoUWJz zqRtJ{ipPqJ(genGx@*Ad<7hoQldRi#DWvjYsoeLS_iu%$g`P0z16_bA^LnIdTc%|Z zv{f+UsD!ix{@WkfTq}jJ)XA*nQZ*8chRE*@G7^9$sWuYPdp324Ss2iYS>%WL7E71| z?)ivuEuqg&)IWXLPnzH>&{Apcy18$*XZUcy&sT8MDR2oq|Cx5*&1?fDy-?2a@;8-< zZ>0MYY4tm`$GZJ4X*tnSPoYp1LAnW+png-Rk5HZdMISrCHZE-S25tjEP>lXxFi8Pc0i6D%cn~qotkp)r#x(G0bdGROE463X8bg(rNI ziOa}Co%b|I{+)mVN$RRO6`vL}xQF1KM2cSL@s6lv85Xt5HK7C=pgB2$?`rMvV@x=k zEkRyv$45K_^^HY2VxexlvUdTdOlBLsPhM*$Hm-EkI-9Tb%2_Os$T(BvF2p=j_{8q6 z#kFi!n_vyo2{0(I`i?FJ3nA8Y;oo+LE2lEl31ydsDns9Ugfbd5raC3m#m^e7toOOY6jUM}P9-^0& zOAtgp|qt-qQ*x9yH zu*L4t9gg=oMtjOB*^Vp&1x5tw)%&*A{`rDfj={T9)AXJgq!D%4Pd)HL;C!vd#U7ge zP|h%vF53R*FF^zn$cRi+P1F1l$)q zBdeSBLL}%&Zd=x2#!IqK;gJ$Q<7|FyR@Jj(c*GIK?QpZ<^1HJPN=roQK0)6o?r6f< z8F_J@fOd<)jWu&0;>h^4ZaX%_2n$cP&n{kn_v zxGV@|d^AKvh>xy1%g2n1B{?2PzkS>?^03PS&xUdy*Fp?A#kxR~Nd(BH2>mpZL{6pFg*nDW~PZTR*D9 z#G=K@qX!1p9|biz4H9}qT2WVE<__`}h%^pkgz@ zL)5Vu?2DD-rR9pKiGF$oRndwdIKj{D9^JSJXLOu}TlKWKDKibKA_EqUnDwW{ec6i2 zN0S_`M2RxfZ=ssraX<#i@hCO$+EfqcTA`@gw;fc3F6jIo1Y3YZ)dd+b)G61f2 zc8V@n>2{)WjM+nk4cAP0yXOc+E#j#_8)5FjNx+p|>~GGrtks zldkeX8%5XX*xF@I;!JMlf#b2ZPHpObkBt`{{d2lBgqqv_ua5)7lTucYzHLDI7&%~` z4WU1lov$ohXB*2CdhfUK5Cv@pH$NEl;uPV`PkQ?zNvgu`2p|Q~y;cXT8K-aYk&*Jf z#5g~bm`W_N)4Bou0^Xg&B6VqHFNz>R_awDl)WtB&*bk*7YDef!V#VUh0(FM=-FgVu z1Qr>g`oMT~QC7!yTbc@d1XxNkVM{wIr2W~#J%imvy;{#fl;(#V10I-ziFcmX;r1iM z14pBX9l8jV2B^L9K6%}IPss+{8ojz8odq$&@Y0{BjZ0ajgbl+Ub*O(;-soMqd`p0( z%|Tul8ssj;jfJOQFTo=vaD2*SrCV; zx@YsZ?kxvMnbIFdW0%ve5qjl%!d`>f`!qIOF zmzTN4MRI1RDpyp#*|7uM(OKuABv-3XA&JDjADl)wQ-AkJhkb6OK2DNcu@>OLPdnB9 zS_D;&bzQXG>=-*Aa!=ur;PDG3lGZ2kbO>1sUP!Q!Qcl<^bbS|vd6JY*OHfQF%^%I4 zXd4v3&I0h7WdD|Kbsfl0zAQ5|!3@^&J*q$c?3?DK+mh7Gp@f2vg4c?D{5)9Q)+yh9FaW@+BVY@ zeZ{?*D6At9DbApx8g$CoEN51zd~5zb5bfL5XD)s-trf;#pUb<%b+#LcU6j8Za50jR zZ}fn_3*%Z%o~x_Kdv}mwjQ4xZ_qdHt=v6@Hz2v_cxTy-A?pniTdY07`Rt1j#f?|?! zXl#+*ua)KF70Ga;N&;q^=zU1?Y*OiCwzzP+a%PB&TA+z#yKBXV&#vwUV*U}~nfa2m zHbT%KeHX84pM`+CToHDQx!q)x0yV6Ma_v?SI#!Cgu?2ZfQhP6-(), std::string("WDG_IC_00000003.aut")); } + + void test_Issue735_wrong_count() + { + xlnt::workbook wb; + wb.load(path_helper::test_file("Issue735_wrong_count.xlsx")); + xlnt_assert_throws_nothing(wb.active_sheet()); + } void test_formatting() { From 63b2f23484af6429e428643d6896745f543bff99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Leahu-Vl=C4=83ducu?= Date: Thu, 15 Aug 2024 22:46:06 +0200 Subject: [PATCH 3/5] Removed digit separator from constant as it does not compile under C++11. --- source/detail/constants.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/detail/constants.cpp b/source/detail/constants.cpp index 3cb8423e7..9cccf31e8 100644 --- a/source/detail/constants.cpp +++ b/source/detail/constants.cpp @@ -51,7 +51,7 @@ const column_t constants::max_column() const size_t constants::max_elements_for_reserve() { - return 10'000; + return 10000; } // constants From cb964436b3a9d9aa43e55c5476ba407f4a816b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Leahu-Vl=C4=83ducu?= Date: Fri, 16 Aug 2024 06:15:22 +0200 Subject: [PATCH 4/5] The "count" is now only parsed if necessary (that is, in certain cases only if THROW_ON_INVALID_XML is defined). --- source/detail/serialization/xlsx_consumer.cpp | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index 0a3404e6c..581a54063 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -2880,11 +2880,6 @@ void xlsx_consumer::read_stylesheet() } else if (current_style_element == qn("spreadsheetml", "dxfs")) { - optional count; - if (parser().attribute_present("count")) - { - count = parser().attribute("count"); - } std::size_t processed = 0; while (in_element(current_style_element)) @@ -2896,9 +2891,13 @@ void xlsx_consumer::read_stylesheet() } #ifdef THROW_ON_INVALID_XML - if (count.is_set() && count != processed) + if (parser().attribute_present("count")) { - throw xlnt::exception("counts don't match"); + std::size_t count = parser().attribute("count"); + if (count != processed) + { + throw xlnt::exception("counts don't match"); + } } #endif } @@ -2907,11 +2906,6 @@ void xlsx_consumer::read_stylesheet() skip_attribute("defaultTableStyle"); skip_attribute("defaultPivotStyle"); - optional count; - if (parser().attribute_present("count")) - { - count = parser().attribute("count"); - } std::size_t processed = 0; while (in_element(qn("spreadsheetml", "tableStyles"))) @@ -2923,9 +2917,13 @@ void xlsx_consumer::read_stylesheet() } #ifdef THROW_ON_INVALID_XML - if (count.is_set() && count != processed) + if (parser().attribute_present("count")) { - throw xlnt::exception("counts don't match"); + std::size_t count = parser().attribute("count"); + if (count != processed) + { + throw xlnt::exception("counts don't match"); + } } #endif } From 053a6e15b0d9aa90eff75daf0fe24e04da4156fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Leahu-Vl=C4=83ducu?= Date: Fri, 16 Aug 2024 12:16:15 +0200 Subject: [PATCH 5/5] Added a utility function for clipping the number of elements for functions like std::vector::reserve (or for other containers), creating simpler code. This now also ensures that memory is pre-allocated even if XML attributes like "count" specify an extreme number of elements (in which case no memory was previously allocated at all anymore). --- source/detail/constants.hpp | 6 +-- source/detail/limits.hpp | 44 +++++++++++++++++++ source/detail/serialization/xlsx_consumer.cpp | 41 +++++------------ 3 files changed, 59 insertions(+), 32 deletions(-) create mode 100644 source/detail/limits.hpp diff --git a/source/detail/constants.hpp b/source/detail/constants.hpp index 42558a912..1e96aeeb1 100644 --- a/source/detail/constants.hpp +++ b/source/detail/constants.hpp @@ -55,9 +55,9 @@ struct XLNT_API constants static const column_t max_column(); /// - /// Returns the maximum amount of elements that functions like std::vector::reserve are allowed to allocate. - /// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve to - /// allocate the memory right away and thus improve performance. However, malicious or broken files + /// Returns the maximum amount of elements that functions like std::vector::reserve (or other containers) are allowed to allocate. + /// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers) + /// to allocate the memory right away and thus improve performance. However, malicious or broken files /// might then cause XLNT to allocate extreme amounts of memory. This function sets a limit to protect against such issues. /// static const size_t max_elements_for_reserve(); diff --git a/source/detail/limits.hpp b/source/detail/limits.hpp new file mode 100644 index 000000000..a4c47856d --- /dev/null +++ b/source/detail/limits.hpp @@ -0,0 +1,44 @@ +// Copyright (c) 2014-2021 Thomas Fussell +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE +// +// @license: http://www.opensource.org/licenses/mit-license.php +// @author: see AUTHORS file + +#pragma once + +#include "constants.hpp" + +namespace xlnt { +namespace detail { + +/// +/// Clips the maximum number of reserved elements to a certain upper limit. +/// Information like a "count" is often saved in XLSX files and can be used by std::vector::reserve (or other containers) +/// to allocate the memory right away and thus improve performance. However, malicious or broken files +/// might then cause XLNT to allocate extreme amounts of memory. This function clips the number of elements +/// to an upper limit to protect against such issues, but still allow the caller to pre-allocate memory. +/// +inline size_t clip_reserve_elements(size_t num_elements) +{ + return std::min(num_elements, xlnt::constants::max_elements_for_reserve()); +} + +} // namespace detail +} // namespace xlnt diff --git a/source/detail/serialization/xlsx_consumer.cpp b/source/detail/serialization/xlsx_consumer.cpp index 581a54063..016530385 100644 --- a/source/detail/serialization/xlsx_consumer.cpp +++ b/source/detail/serialization/xlsx_consumer.cpp @@ -46,6 +46,7 @@ #include #include #include +#include namespace { /// string_equal @@ -2323,10 +2324,7 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) - { - borders.reserve(count.get()); - } + borders.reserve(xlnt::detail::clip_reserve_elements(count.get())); } while (in_element(qn("spreadsheetml", "borders"))) @@ -2394,10 +2392,7 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) - { - fills.reserve(count.get()); - } + fills.reserve(xlnt::detail::clip_reserve_elements(count.get())); } while (in_element(qn("spreadsheetml", "fills"))) @@ -2489,10 +2484,7 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) - { - fonts.reserve(count.get()); - } + fonts.reserve(xlnt::detail::clip_reserve_elements(count.get())); } if (parser().attribute_present(qn("x14ac", "knownFonts"))) @@ -2642,10 +2634,7 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) - { - number_formats.reserve(count.get()); - } + number_formats.reserve(xlnt::detail::clip_reserve_elements(count.get())); } while (in_element(qn("spreadsheetml", "numFmts"))) @@ -2682,10 +2671,7 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) - { - styles.reserve(count.get()); - } + styles.reserve(xlnt::detail::clip_reserve_elements(count.get())); } while (in_element(qn("spreadsheetml", "cellStyles"))) @@ -2730,16 +2716,13 @@ void xlsx_consumer::read_stylesheet() if (parser().attribute_present("count")) { count = parser().attribute("count"); - if (count.get() <= xlnt::constants::max_elements_for_reserve()) + if (in_style_records) { - if (in_style_records) - { - style_records.reserve(count.get()); - } - else - { - format_records.reserve(count.get()); - } + style_records.reserve(xlnt::detail::clip_reserve_elements(count.get())); + } + else + { + format_records.reserve(xlnt::detail::clip_reserve_elements(count.get())); } }