diff --git a/third_party/libxml/BUILD.gn b/third_party/libxml/BUILD.gn index b725e09b0e56..719953224ff0 100644 --- a/third_party/libxml/BUILD.gn +++ b/third_party/libxml/BUILD.gn @@ -4,11 +4,12 @@ # Define an "os_include" variable that points at the OS-specific generated # headers. These were generated by running the configure script offline. -if (is_starboard) { +if (use_cobalt_customizations) { os_include = "starboard" -} else if (is_linux || is_android || is_nacl || is_fuchsia) { +} else +if (is_linux || is_chromeos || is_android || is_nacl || is_fuchsia) { os_include = "linux" -} else if (is_mac || is_ios) { +} else if (is_apple) { os_include = "mac" } else if (is_win) { os_include = "win32" @@ -74,7 +75,7 @@ config("libxml_warnings") { "-Wno-unused-but-set-variable", ] } - } else if (is_linux) { + } else if (is_linux || is_chromeos) { cflags = [ # gcc spits out a bunch of warnings about passing too many arguments to # __xmlSimpleError. @@ -83,44 +84,44 @@ config("libxml_warnings") { } } -if (!is_starboard) { - static_library("xml_reader") { - # Do not expand this visibility list without first consulting with the - # Security Team. - visibility = [ - "//base/test:test_support", - "//components/policy/core/common:unit_tests", - "//services/data_decoder:*", - "//tools/traffic_annotation/auditor:auditor_sources", - ] - if (is_win) { - visibility += [ "//components/wifi" ] - } - sources = [ - "chromium/xml_reader.cc", - "chromium/xml_reader.h", - ] - deps = [ - ":libxml", - ":libxml_utils", - ] - configs += [ ":libxml_config" ] +if (!use_cobalt_customizations) { # exclude libraries "xml_reader" and "xml_writer" +static_library("xml_reader") { + # Do not expand this visibility list without first consulting with the + # Security Team. + visibility = [ + "//base/test:test_support", + "//components/policy/core/common:unit_tests", + "//services/data_decoder:*", + "//tools/traffic_annotation/auditor:auditor_sources", + ] + if (is_win) { + visibility += [ "//components/wifi" ] } + sources = [ + "chromium/xml_reader.cc", + "chromium/xml_reader.h", + ] + deps = [ + ":libxml", + ":libxml_utils", + ] + configs += [ ":libxml_config" ] +} - static_library("xml_writer") { - # The XmlWriter is considered safe to use from any target. - visibility = [ "*" ] - sources = [ - "chromium/xml_writer.cc", - "chromium/xml_writer.h", - ] - deps = [ - ":libxml", - ":libxml_utils", - ] - configs += [ ":libxml_config" ] - } +static_library("xml_writer") { + # The XmlWriter is considered safe to use from any target. + visibility = [ "*" ] + sources = [ + "chromium/xml_writer.cc", + "chromium/xml_writer.h", + ] + deps = [ + ":libxml", + ":libxml_utils", + ] + configs += [ ":libxml_config" ] } +} # exclude libraries "xml_reader" and "xml_writer" static_library("libxml_utils") { # Do not expand this visibility list without first consulting with the @@ -144,15 +145,17 @@ static_library("libxml") { # Do not expand this visibility list without first consulting with the # Security Team. visibility = [ + ":libxml_utils", ":xml_reader", ":xml_writer", - ":libxml_utils", + "//chromecast/internal", "//testing/libfuzzer/*", "//third_party/blink/renderer/*", "//third_party/fontconfig", "//third_party/libxslt", + "//third_party/maldoca/*", ] - if (is_starboard) { + if (use_cobalt_customizations) { visibility += [ "//base/test:test_support", "//cobalt/base", @@ -172,11 +175,6 @@ static_library("libxml") { # Commented out sources are libxml2 files we do not want to include. They are # here to make it easy to identify files which are new. sources = [ - "linux/config.h", - "linux/include/libxml/xmlversion.h", - "mac/config.h", - "mac/include/libxml/xmlversion.h", - #"src/DOCBparser.c", "src/HTMLparser.c", "src/HTMLtree.c", @@ -245,8 +243,6 @@ static_library("libxml") { "src/include/libxml/xpath.h", "src/include/libxml/xpathInternals.h", "src/include/libxml/xpointer.h", - "src/include/win32config.h", - "src/include/wsockcompat.h", #"src/legacy.c", "src/libxml.h", @@ -297,13 +293,11 @@ static_library("libxml") { "src/xpointer.c", #"src/xzlib.c", "src/xzlib.h", - "win32/config.h", - "win32/include/libxml/xmlversion.h", ] - if (!is_starboard) { - configs -= [ "//build/config/compiler:chromium_code" ] - } +if (!use_cobalt_customizations) { + configs -= [ "//build/config/compiler:chromium_code" ] +} configs += [ "//build/config/compiler:no_chromium_code", @@ -311,7 +305,28 @@ static_library("libxml") { ":libxml_warnings", ] + if (is_linux || is_chromeos) { + sources += [ + "linux/config.h", + "linux/include/libxml/xmlversion.h", + ] + } + + if (is_mac) { + sources += [ + "mac/config.h", + "mac/include/libxml/xmlversion.h", + ] + } + if (is_win) { + sources += [ + "src/include/win32config.h", + "src/include/wsockcompat.h", + "win32/config.h", + "win32/include/libxml/xmlversion.h", + ] + # libxml2 already defines WIN32_LEAN_AND_MEAN. configs -= [ "//build/config/win:lean_and_mean" ] } @@ -320,7 +335,7 @@ static_library("libxml") { public_deps = [ "//third_party/icu:icuuc" ] deps = [ "//third_party/zlib" ] - if (is_mac || is_ios || is_android || is_fuchsia) { + if (is_apple || is_android || is_fuchsia) { # http://www.xmlsoft.org/threads.html says that this is required when using # libxml from several threads, which can possibly happen in chrome. On # linux, this is picked up by transitivity from pkg-config output from diff --git a/third_party/libxml/DIR_METADATA b/third_party/libxml/DIR_METADATA new file mode 100644 index 000000000000..a0e30b3eb068 --- /dev/null +++ b/third_party/libxml/DIR_METADATA @@ -0,0 +1,3 @@ +monorail: { + component: "Blink>XML" +} diff --git a/third_party/libxml/METADATA b/third_party/libxml/METADATA index 230f364e1836..387381a2b753 100644 --- a/third_party/libxml/METADATA +++ b/third_party/libxml/METADATA @@ -5,15 +5,13 @@ description: third_party { url { type: GIT - value: "https://chromium.googlesource.com/chromium/src/third_party/libxml" + value: "https://chromium.googlesource.com/chromium/src" } - # NOTE: This is @v2.9.10 release tag, which does not match the repo - # version exactly, because of various intermediate Chromium changes - version: "405e251c" + version: "96.0.4664.153" last_upgrade_date { - year: 2020 - month: 5 - day: 1 + year: 2023 + month: 6 + day: 27 } license_type: NOTICE } diff --git a/third_party/libxml/OWNERS b/third_party/libxml/OWNERS index 494e9ef8a116..536598c59280 100644 --- a/third_party/libxml/OWNERS +++ b/third_party/libxml/OWNERS @@ -2,6 +2,4 @@ # a CL to add yourself here. dcheng@chromium.org schenney@chromium.org -palmer@chromium.org - -# COMPONENT: Blink>XML +jarhar@chromium.org diff --git a/third_party/libxml/README.chromium b/third_party/libxml/README.chromium index 638320e99fbe..1d1d773b6dfd 100644 --- a/third_party/libxml/README.chromium +++ b/third_party/libxml/README.chromium @@ -1,7 +1,7 @@ Name: libxml URL: http://xmlsoft.org -Version: e4fb36841800038c289997432ca547c9bfef9db1 -CPEPrefix: cpe:/a:xmlsoft:libxml2:2.9.9 +Version: dea91c97debeac7c1aaf9c19f79029809e23a353 +CPEPrefix: cpe:/a:xmlsoft:libxml2:2.9.12 License: MIT License File: src/Copyright Security Critical: yes @@ -20,10 +20,8 @@ Modifications: See https://crbug.com/708433 - libxml2-2.9.4-security-xpath-nodetab-uaf.patch: See https://crbug.com/705445 - chromium-issue-708434.patch: Guard against input counter overflow. -- revert-non-recursive-xml-parsing.patch: Making parts of the XML parser - non-recursive broke a few web platform tests. -- add-missing-ifdef-in-xml-reader.patch: Bug fix forgot to guard functionality - with appropriate #ifdef. +- chromium-issue-1138555.patch: Change TRUE to 1 for ICU68 which remove the + #define of TRUE. - Add helper classes in the chromium/ subdirectory. - Delete various unused files, see chromium/roll.py diff --git a/third_party/libxml/chromium/add-missing-ifdef-in-xml-reader.patch b/third_party/libxml/chromium/add-missing-ifdef-in-xml-reader.patch deleted file mode 100644 index 17cd15a6b9eb..000000000000 --- a/third_party/libxml/chromium/add-missing-ifdef-in-xml-reader.patch +++ /dev/null @@ -1,13 +0,0 @@ ---- a/xmlreader.c -+++ b/xmlreader.c -@@ -2262,8 +2262,10 @@ xmlFreeTextReader(xmlTextReaderPtr reader) { - reader->dict = NULL; - if ((reader->ctxt->vctxt.vstateTab != NULL) && - (reader->ctxt->vctxt.vstateMax > 0)){ -+#ifdef LIBXML_REGEXP_ENABLED - while (reader->ctxt->vctxt.vstateNr > 0) - xmlValidatePopElement(&reader->ctxt->vctxt, NULL, NULL, NULL); -+#endif /* LIBXML_REGEXP_ENABLED */ - xmlFree(reader->ctxt->vctxt.vstateTab); - reader->ctxt->vctxt.vstateTab = NULL; - reader->ctxt->vctxt.vstateMax = 0; diff --git a/third_party/libxml/chromium/revert-non-recursive-xml-parsing.patch b/third_party/libxml/chromium/revert-non-recursive-xml-parsing.patch deleted file mode 100644 index e81c7c2dcf04..000000000000 --- a/third_party/libxml/chromium/revert-non-recursive-xml-parsing.patch +++ /dev/null @@ -1,268 +0,0 @@ -diff --git a/parser.c b/parser.c -index 072eb22d..f863edd1 100644 ---- a/parser.c -+++ b/parser.c -@@ -96,12 +96,6 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, - - static void xmlHaltParser(xmlParserCtxtPtr ctxt); - --static int --xmlParseElementStart(xmlParserCtxtPtr ctxt); -- --static void --xmlParseElementEnd(xmlParserCtxtPtr ctxt); -- - /************************************************************************ - * * - * Arbitrary limits set in the parser. See XML_PARSE_HUGE * -@@ -1828,6 +1822,7 @@ nodePop(xmlParserCtxtPtr ctxt) - return (ret); - } - -+#ifdef LIBXML_PUSH_ENABLED - /** - * nameNsPush: - * @ctxt: an XML parser context -@@ -1863,11 +1858,6 @@ nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, - goto mem_error; - } - ctxt->pushTab = tmp2; -- } else if (ctxt->pushTab == NULL) { -- ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * -- sizeof(ctxt->pushTab[0])); -- if (ctxt->pushTab == NULL) -- goto mem_error; - } - ctxt->nameTab[ctxt->nameNr] = value; - ctxt->name = value; -@@ -1879,7 +1869,6 @@ mem_error: - xmlErrMemory(ctxt, NULL); - return (-1); - } --#ifdef LIBXML_PUSH_ENABLED - /** - * nameNsPop: - * @ctxt: an XML parser context -@@ -9812,10 +9801,9 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { - - void - xmlParseContent(xmlParserCtxtPtr ctxt) { -- int nameNr = ctxt->nameNr; -- - GROW; - while ((RAW != 0) && -+ ((RAW != '<') || (NXT(1) != '/')) && - (ctxt->instate != XML_PARSER_EOF)) { - const xmlChar *test = CUR_PTR; - unsigned int cons = ctxt->input->consumed; -@@ -9849,13 +9837,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { - * Fourth case : a sub-element. - */ - else if (*cur == '<') { -- if (NXT(1) == '/') { -- if (ctxt->nameNr <= nameNr) -- break; -- xmlParseElementEnd(ctxt); -- } else { -- xmlParseElementStart(ctxt); -- } -+ xmlParseElement(ctxt); - } - - /* -@@ -9890,7 +9872,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { - * xmlParseElement: - * @ctxt: an XML parser context - * -- * parse an XML element -+ * parse an XML element, this is highly recursive - * - * [39] element ::= EmptyElemTag | STag content ETag - * -@@ -9902,23 +9884,6 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { - - void - xmlParseElement(xmlParserCtxtPtr ctxt) { -- if (xmlParseElementStart(ctxt) != 0) -- return; -- xmlParseContent(ctxt); -- if (ctxt->instate == XML_PARSER_EOF) -- return; -- xmlParseElementEnd(ctxt); --} -- --/** -- * xmlParseElementStart: -- * @ctxt: an XML parser context -- * -- * Parse the start of an XML element. Returns -1 in case of error, 0 if an -- * opening tag was parsed, 1 if an empty element was parsed. -- */ --static int --xmlParseElementStart(xmlParserCtxtPtr ctxt) { - const xmlChar *name; - const xmlChar *prefix = NULL; - const xmlChar *URI = NULL; -@@ -9933,7 +9898,7 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { - "Excessive depth in document: %d use XML_PARSE_HUGE option\n", - xmlParserMaxDepth); - xmlHaltParser(ctxt); -- return(-1); -+ return; - } - - /* Capture start position */ -@@ -9960,17 +9925,12 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { - name = xmlParseStartTag(ctxt); - #endif /* LIBXML_SAX1_ENABLED */ - if (ctxt->instate == XML_PARSER_EOF) -- return(-1); -+ return; - if (name == NULL) { - spacePop(ctxt); -- return(-1); -+ return; - } -- if (ctxt->sax2) -- nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); --#ifdef LIBXML_SAX1_ENABLED -- else -- namePush(ctxt, name); --#endif /* LIBXML_SAX1_ENABLED */ -+ namePush(ctxt, name); - ret = ctxt->node; - - #ifdef LIBXML_VALID_ENABLED -@@ -10011,7 +9971,7 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { - node_info.node = ret; - xmlParserAddNodeInfo(ctxt, &node_info); - } -- return(1); -+ return; - } - if (RAW == '>') { - NEXT1; -@@ -10039,39 +9999,41 @@ xmlParseElementStart(xmlParserCtxtPtr ctxt) { - node_info.node = ret; - xmlParserAddNodeInfo(ctxt, &node_info); - } -- return(-1); -+ return; - } - -- return(0); --} -- --/** -- * xmlParseElementEnd: -- * @ctxt: an XML parser context -- * -- * Parse the end of an XML element. -- */ --static void --xmlParseElementEnd(xmlParserCtxtPtr ctxt) { -- xmlParserNodeInfo node_info; -- xmlNodePtr ret = ctxt->node; -+ /* -+ * Parse the content of the element: -+ */ -+ xmlParseContent(ctxt); -+ if (ctxt->instate == XML_PARSER_EOF) -+ return; -+ if (!IS_BYTE_CHAR(RAW)) { -+ xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, -+ "Premature end of data in tag %s line %d\n", -+ name, line, NULL); - -- if (ctxt->nameNr <= 0) -- return; -+ /* -+ * end of parsing of this node. -+ */ -+ nodePop(ctxt); -+ namePop(ctxt); -+ spacePop(ctxt); -+ if (nsNr != ctxt->nsNr) -+ nsPop(ctxt, ctxt->nsNr - nsNr); -+ return; -+ } - - /* - * parse the end of tag: 'sax2) { -- const xmlChar *prefix = ctxt->pushTab[ctxt->nameNr * 3 - 3]; -- const xmlChar *URI = ctxt->pushTab[ctxt->nameNr * 3 - 2]; -- int nsNr = (ptrdiff_t) ctxt->pushTab[ctxt->nameNr * 3 - 1]; -- xmlParseEndTag2(ctxt, prefix, URI, 0, nsNr, 0); -+ xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); - namePop(ctxt); - } - #ifdef LIBXML_SAX1_ENABLED -- else -- xmlParseEndTag1(ctxt, 0); -+ else -+ xmlParseEndTag1(ctxt, line); - #endif /* LIBXML_SAX1_ENABLED */ - - /* -@@ -12388,6 +12350,13 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, - return(NULL); - } - ctxt->dictNames = 1; -+ ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); -+ if (ctxt->pushTab == NULL) { -+ xmlErrMemory(ctxt, NULL); -+ xmlFreeParserInputBuffer(buf); -+ xmlFreeParserCtxt(ctxt); -+ return(NULL); -+ } - if (sax != NULL) { - #ifdef LIBXML_SAX1_ENABLED - if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) -@@ -14835,6 +14804,16 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, - - xmlCtxtReset(ctxt); - -+ if (ctxt->pushTab == NULL) { -+ ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * -+ sizeof(xmlChar *)); -+ if (ctxt->pushTab == NULL) { -+ xmlErrMemory(ctxt, NULL); -+ xmlFreeParserInputBuffer(buf); -+ return(1); -+ } -+ } -+ - if (filename == NULL) { - ctxt->directory = NULL; - } else { -diff --git a/result/errors/754947.xml.err b/result/errors/754947.xml.err -index 51e9b4ed..f45cb5a2 100644 ---- a/result/errors/754947.xml.err -+++ b/result/errors/754947.xml.err -@@ -2,6 +2,6 @@ - Bytes: 0xEE 0x5D 0x5D 0x3E - - ^ --./test/errors/754947.xml:1: parser error : EndTag: ' - ^ -diff --git a/result/errors/759398.xml.err b/result/errors/759398.xml.err -index bc9e5e03..f6036a3b 100644 ---- a/result/errors/759398.xml.err -+++ b/result/errors/759398.xml.err -@@ -1,10 +1,10 @@ - ./test/errors/759398.xml:210: parser error : StartTag: invalid element name - need to worry about parsers whi - ^ - ./test/errors/759398.xml:316: parser error : Extra content at the end of the document diff --git a/third_party/libxml/chromium/roll.py b/third_party/libxml/chromium/roll.py index a75c3bbae72e..ef23d4c3a7b4 100755 --- a/third_party/libxml/chromium/roll.py +++ b/third_party/libxml/chromium/roll.py @@ -34,12 +34,6 @@ # 3. On Mac, install these packages with brew: # autoconf automake libtool pkgconfig icu4c # -# Important! Before running roll.py, set these environmental variables so the -# configure script can find ICU: -# export LDFLAGS="-L/path/to/homebrew/opt/icu4c/lib" -# export CPPFLAGS="-I/path/to/homebrew/opt/icu4c/include" -# export PKG_CONFIG_PATH="/path/to/homebrew/opt/icu4c/lib/pkgconfig" -# # Procedure: # # Warning: This process is destructive. Run it on a clean branch. @@ -66,20 +60,16 @@ # # 3. On Mac, in the Chromium src directory: # a. git cl patch -# b. third_party/libxml/chromium/roll.py --mac +# b. third_party/libxml/chromium/roll.py --mac --icu4c_path=~/homebrew/opt/icu4c # c. Make and commit any final changes to README.chromium, BUILD.gn, etc. # d. git cl upload # e. Complete the review as usual PATCHES = [ - # TODO(dcheng): reach out upstream to see what's going on here. - 'revert-non-recursive-xml-parsing.patch', 'chromium-issue-599427.patch', 'chromium-issue-628581.patch', 'libxml2-2.9.4-security-xpath-nodetab-uaf.patch', 'chromium-issue-708434.patch', - # TODO(dcheng): Merge this back upstream. - 'add-missing-ifdef-in-xml-reader.patch', ] @@ -405,17 +395,22 @@ def roll_libxml_win32(src_path): XML_WIN32_CONFIGURE_OPTIONS) # Add and commit the result. - shutil.move('VC10/config.h', '../../win32/config.h') + shutil.move('../config.h', '../../win32/config.h') git('add', '../../win32/config.h') shutil.move('../include/libxml/xmlversion.h', '../../win32/include/libxml/xmlversion.h') git('add', '../../win32/include/libxml/xmlversion.h') - git('commit', '-m', 'Windows') + git('commit', '--allow-empty', '-m', 'Windows') git('clean', '-f') print('Now push to Mac and run steps there.') -def roll_libxml_mac(src_path): +def roll_libxml_mac(src_path, icu4c_path): + icu4c_path = os.path.abspath(os.path.expanduser(icu4c_path)) + os.environ["LDFLAGS"] = "-L" + os.path.join(icu4c_path, 'lib') + os.environ["CPPFLAGS"] = "-I" + os.path.join(icu4c_path, 'include') + os.environ["PKG_CONFIG_PATH"] = os.path.join(icu4c_path, 'lib/pkgconfig') + full_path_to_third_party_libxml = os.path.join( src_path, THIRD_PARTY_LIBXML_SRC, '..') @@ -459,6 +454,9 @@ def main(): type=str, nargs='?', help='The path to the local clone of the libxml2 git repo.') + parser.add_argument( + '--icu4c_path', + help='The path to the homebrew installation of icu4c.') args = parser.parse_args() if args.linux: @@ -471,7 +469,12 @@ def main(): elif args.win32: roll_libxml_win32(src_dir) elif args.mac: - roll_libxml_mac(src_dir) + icu4c_path = args.icu4c_path + if not icu4c_path: + print('Specify the path to the homebrew installation of icu4c with --icu4c_path.') + print(' ex: roll.py --mac --icu4c_path=~/homebrew/opt/icu4c') + sys.exit(1) + roll_libxml_mac(src_dir, icu4c_path) if __name__ == '__main__': diff --git a/third_party/libxml/linux/config.h b/third_party/libxml/linux/config.h index e3342408501b..25172b55031d 100644 --- a/third_party/libxml/linux/config.h +++ b/third_party/libxml/linux/config.h @@ -1,6 +1,9 @@ /* config.h. Generated from config.h.in by configure. */ /* config.h.in. Generated from configure.ac by autoheader. */ +/* A form that will not confuse apibuild.py */ +#define ATTRIBUTE_DESTRUCTOR __attribute__((destructor)) + /* Type cast for the gethostbyname() argument */ #define GETHOSTBYNAME_ARG_CAST /**/ @@ -10,6 +13,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_ARPA_NAMESER_H 1 +/* Define if __attribute__((destructor)) is accepted */ +#define HAVE_ATTRIBUTE_DESTRUCTOR 1 + /* Whether struct sockaddr::__ss_family exists */ /* #undef HAVE_BROKEN_SS_FAMILY */ @@ -240,19 +246,19 @@ #define PACKAGE_BUGREPORT "" /* Define to the full name of this package. */ -#define PACKAGE_NAME "" +#define PACKAGE_NAME "libxml2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "" +#define PACKAGE_STRING "libxml2 2.9.12" /* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "" +#define PACKAGE_TARNAME "libxml2" /* Define to the home page for this package. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "" +#define PACKAGE_VERSION "2.9.12" /* Type cast for the send() function 2nd arg */ #define SEND_ARG2_CAST /**/ @@ -267,7 +273,7 @@ #define VA_LIST_IS_ARRAY 1 /* Version number of package */ -#define VERSION "2.9.10" +#define VERSION "2.9.12" /* Determine what socket length (socklen_t) data type is */ #define XML_SOCKLEN_T socklen_t diff --git a/third_party/libxml/linux/include/libxml/xmlversion.h b/third_party/libxml/linux/include/libxml/xmlversion.h index 37f673e252f4..4c364e640448 100644 --- a/third_party/libxml/linux/include/libxml/xmlversion.h +++ b/third_party/libxml/linux/include/libxml/xmlversion.h @@ -29,21 +29,21 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.9.10" +#define LIBXML_DOTTED_VERSION "2.9.12" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 20910 +#define LIBXML_VERSION 20912 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "20910" +#define LIBXML_VERSION_STRING "20912" /** * LIBXML_VERSION_EXTRA: @@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(20910); +#define LIBXML_TEST_VERSION xmlCheckVersion(20912); #ifndef VMS #if 0 diff --git a/third_party/libxml/linux/xml2-config b/third_party/libxml/linux/xml2-config index 7a271d4ec7f9..3717ff94744b 100755 --- a/third_party/libxml/linux/xml2-config +++ b/third_party/libxml/linux/xml2-config @@ -58,7 +58,7 @@ while test $# -gt 0; do ;; --version) - echo 2.9.10 + echo 2.9.12 exit 0 ;; diff --git a/third_party/libxml/mac/config.h b/third_party/libxml/mac/config.h index 16cd0ae30f21..27a79ec1716a 100644 --- a/third_party/libxml/mac/config.h +++ b/third_party/libxml/mac/config.h @@ -1,6 +1,9 @@ /* config.h. Generated from config.h.in by configure. */ /* config.h.in. Generated from configure.ac by autoheader. */ +/* A form that will not confuse apibuild.py */ +#define ATTRIBUTE_DESTRUCTOR __attribute__((destructor)) + /* Type cast for the gethostbyname() argument */ #define GETHOSTBYNAME_ARG_CAST /**/ @@ -10,6 +13,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_ARPA_NAMESER_H 1 +/* Define if __attribute__((destructor)) is accepted */ +#define HAVE_ATTRIBUTE_DESTRUCTOR 1 + /* Whether struct sockaddr::__ss_family exists */ /* #undef HAVE_BROKEN_SS_FAMILY */ @@ -85,9 +91,6 @@ /* Define to 1 if you have the header file. */ #define HAVE_MATH_H 1 -/* Define to 1 if you have the header file. */ -#define HAVE_MEMORY_H 1 - /* Define to 1 if you have the `mmap' function. */ #define HAVE_MMAP 1 @@ -159,6 +162,9 @@ /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 +/* Define to 1 if you have the header file. */ +#define HAVE_STDIO_H 1 + /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 @@ -240,24 +246,26 @@ #define PACKAGE_BUGREPORT "" /* Define to the full name of this package. */ -#define PACKAGE_NAME "" +#define PACKAGE_NAME "libxml2" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "" +#define PACKAGE_STRING "libxml2 2.9.12" /* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "" +#define PACKAGE_TARNAME "libxml2" /* Define to the home page for this package. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "" +#define PACKAGE_VERSION "2.9.12" /* Type cast for the send() function 2nd arg */ #define SEND_ARG2_CAST /**/ -/* Define to 1 if you have the ANSI C header files. */ +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ #define STDC_HEADERS 1 /* Support for IPv6 */ @@ -267,7 +275,7 @@ #define VA_LIST_IS_ARRAY 1 /* Version number of package */ -#define VERSION "2.9.10" +#define VERSION "2.9.12" /* Determine what socket length (socklen_t) data type is */ #define XML_SOCKLEN_T socklen_t diff --git a/third_party/libxml/mac/include/libxml/xmlversion.h b/third_party/libxml/mac/include/libxml/xmlversion.h index 37f673e252f4..4c364e640448 100644 --- a/third_party/libxml/mac/include/libxml/xmlversion.h +++ b/third_party/libxml/mac/include/libxml/xmlversion.h @@ -29,21 +29,21 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.9.10" +#define LIBXML_DOTTED_VERSION "2.9.12" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 20910 +#define LIBXML_VERSION 20912 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "20910" +#define LIBXML_VERSION_STRING "20912" /** * LIBXML_VERSION_EXTRA: @@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(20910); +#define LIBXML_TEST_VERSION xmlCheckVersion(20912); #ifndef VMS #if 0 diff --git a/third_party/libxml/src/CMakeLists.txt b/third_party/libxml/src/CMakeLists.txt new file mode 100644 index 000000000000..073869fc0c4e --- /dev/null +++ b/third_party/libxml/src/CMakeLists.txt @@ -0,0 +1,772 @@ +cmake_minimum_required(VERSION 3.15) + +file(STRINGS "configure.ac" CONFIGURE_AC_LINES) +foreach(line ${CONFIGURE_AC_LINES}) + if(line MATCHES [[^m4_define\(\[(MAJOR_VERSION|MINOR_VERSION|MICRO_VERSION)\],[ \t]*([0-9]+)\)$]]) + set(LIBXML_${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + elseif(line MATCHES "^(LIBXML_MAJOR_VERSION|LIBXML_MINOR_VERSION|LIBXML_MICRO_VERSION)=([0-9]+)$") + set(${CMAKE_MATCH_1} ${CMAKE_MATCH_2}) + endif() +endforeach() +set(VERSION "${LIBXML_MAJOR_VERSION}.${LIBXML_MINOR_VERSION}.${LIBXML_MICRO_VERSION}") + +project(libxml2 VERSION ${VERSION} LANGUAGES C) + +include(CheckCSourceCompiles) +include(CheckFunctionExists) +include(CheckIncludeFiles) +include(CheckLibraryExists) +include(CheckStructHasMember) +include(CheckSymbolExists) +include(CMakePackageConfigHelpers) +include(GNUInstallDirs) + +option(BUILD_SHARED_LIBS "Build shared libraries" ON) +set(LIBXML2_WITH_AUTOMATA ON) +option(LIBXML2_WITH_C14N "Add the Canonicalization support" ON) +option(LIBXML2_WITH_CATALOG "Add the Catalog support" ON) +option(LIBXML2_WITH_DEBUG "Add the debugging module" ON) +option(LIBXML2_WITH_DOCB "Add Docbook SGML support" ON) +set(LIBXML2_WITH_EXPR ON) +option(LIBXML2_WITH_FTP "Add the FTP support" ON) +option(LIBXML2_WITH_HTML "Add the HTML support" ON) +option(LIBXML2_WITH_HTTP "Add the HTTP support" ON) +option(LIBXML2_WITH_ICONV "Add ICONV support" ON) +option(LIBXML2_WITH_ICU "Add ICU support" OFF) +option(LIBXML2_WITH_ISO8859X "Add ISO8859X support if no iconv" ON) +option(LIBXML2_WITH_LEGACY "Add deprecated APIs for compatibility" ON) +option(LIBXML2_WITH_LZMA "Use liblzma" ON) +option(LIBXML2_WITH_MEM_DEBUG "Add the memory debugging module" OFF) +option(LIBXML2_WITH_MODULES "Add the dynamic modules support" ON) +option(LIBXML2_WITH_OUTPUT "Add the serialization support" ON) +option(LIBXML2_WITH_PATTERN "Add the xmlPattern selection interface" ON) +option(LIBXML2_WITH_PROGRAMS "Build programs" ON) +option(LIBXML2_WITH_PUSH "Add the PUSH parser interfaces" ON) +option(LIBXML2_WITH_PYTHON "Build Python bindings" ON) +option(LIBXML2_WITH_READER "Add the xmlReader parsing interface" ON) +option(LIBXML2_WITH_REGEXPS "Add Regular Expressions support" ON) +option(LIBXML2_WITH_RUN_DEBUG "Add the runtime debugging module" OFF) +option(LIBXML2_WITH_SAX1 "Add the older SAX1 interface" ON) +option(LIBXML2_WITH_SCHEMAS "Add Relax-NG and Schemas support" ON) +option(LIBXML2_WITH_SCHEMATRON "Add Schematron support" ON) +option(LIBXML2_WITH_TESTS "Build tests" ON) +option(LIBXML2_WITH_THREADS "Add multithread support" ON) +option(LIBXML2_WITH_THREAD_ALLOC "Add per-thread memory" OFF) +option(LIBXML2_WITH_TREE "Add the DOM like tree manipulation APIs" ON) +set(LIBXML2_WITH_TRIO OFF) +set(LIBXML2_WITH_UNICODE ON) +option(LIBXML2_WITH_VALID "Add the DTD validation support" ON) +option(LIBXML2_WITH_WRITER "Add the xmlWriter saving interface" ON) +option(LIBXML2_WITH_XINCLUDE "Add the XInclude support" ON) +option(LIBXML2_WITH_XPATH "Add the XPATH support" ON) +option(LIBXML2_WITH_XPTR "Add the XPointer support" ON) +option(LIBXML2_WITH_ZLIB "Use libz" ON) +set(LIBXML2_XMLCONF_WORKING_DIR ${CMAKE_CURRENT_BINARY_DIR} CACHE PATH "Working directory for XML Conformance Test Suite") + +if(LIBXML2_WITH_ICONV) + find_package(Iconv REQUIRED) +endif() + +if(LIBXML2_WITH_ICU) + find_package(ICU REQUIRED COMPONENTS data i18n uc) +endif() + +if(LIBXML2_WITH_LZMA) + find_package(LibLZMA REQUIRED) +endif() + +if(LIBXML2_WITH_PYTHON) + check_include_files(unistd.h HAVE_UNISTD_H) + check_symbol_exists(F_GETFL fcntl.h HAVE_F_GETFL) + if(HAVE_UNISTD_H AND HAVE_F_GETFL) + find_package(Python COMPONENTS Interpreter Development REQUIRED) + else() + find_package(Python2 COMPONENTS Interpreter Development REQUIRED) + add_library(Python::Python ALIAS Python2::Python) + set(Python_EXECUTABLE ${Python2_EXECUTABLE}) + set(Python_SITEARCH ${Python2_SITEARCH}) + endif() + set(LIBXML2_PYTHON_INSTALL_DIR ${Python_SITEARCH} CACHE PATH "Python bindings install directory") +endif() + +if(LIBXML2_WITH_THREADS) + find_package(Threads REQUIRED) +endif() + +if(LIBXML2_WITH_ZLIB) + find_package(ZLIB REQUIRED) +endif() + +foreach(VARIABLE IN ITEMS WITH_AUTOMATA WITH_C14N WITH_CATALOG WITH_DEBUG WITH_DOCB WITH_EXPR WITH_FTP WITH_HTML WITH_HTTP WITH_ICONV WITH_ICU WITH_ISO8859X WITH_LEGACY WITH_LZMA WITH_MEM_DEBUG WITH_MODULES WITH_OUTPUT WITH_PATTERN WITH_PUSH WITH_READER WITH_REGEXPS WITH_RUN_DEBUG WITH_SAX1 WITH_SCHEMAS WITH_SCHEMATRON WITH_THREADS WITH_THREAD_ALLOC WITH_TREE WITH_TRIO WITH_UNICODE WITH_VALID WITH_WRITER WITH_XINCLUDE WITH_XPATH WITH_XPTR WITH_ZLIB) + if(LIBXML2_${VARIABLE}) + set(${VARIABLE} 1) + else() + set(${VARIABLE} 0) + endif() +endforeach() + +set(LIBXML_VERSION ${LIBXML_MAJOR_VERSION}0${LIBXML_MINOR_VERSION}0${LIBXML_MICRO_VERSION}) +set(LIBXML_VERSION_STRING "${LIBXML_VERSION}") +set(LIBXML_VERSION_EXTRA "") +set(LIBXML_VERSION_NUMBER ${LIBXML_VERSION}) + +set(MODULE_EXTENSION "${CMAKE_SHARED_LIBRARY_SUFFIX}") + +set(PACKAGE "libxml2") +set(PACKAGE_BUGREPORT "xml@gnome.org") +set(PACKAGE_NAME "libxml2") +set(PACKAGE_STRING "libxml2 ${VERSION}") +set(PACKAGE_TARNAME "libxml2") +set(PACKAGE_URL "http://www.xmlsoft.org/") +set(PACKAGE_VERSION ${VERSION}) + +if(LIBLZMA_FOUND) + list(APPEND CMAKE_REQUIRED_LIBRARIES LibLZMA::LibLZMA) +endif() + +if(Threads_FOUND) + list(APPEND CMAKE_REQUIRED_LIBRARIES Threads::Threads) +endif() + +if(ZLIB_FOUND) + list(APPEND CMAKE_REQUIRED_LIBRARIES ZLIB::ZLIB) +endif() + +if(MSVC) + configure_file(include/win32config.h config.h COPYONLY) +else() + check_c_source_compiles(" + void __attribute__((destructor)) + f(void) {} + int main(void) { return 0; } + " ATTRIBUTE_DESTRUCTOR) + check_c_source_compiles(" + #include + int main() { (void) gethostbyname((const char*) \"\"); return 0; } + " GETHOSTBYNAME_ARG_CAST_CONST) + if(NOT GETHOSTBYNAME_ARG_CAST_CONST) + set(GETHOSTBYNAME_ARG_CAST "(char *)") + else() + set(GETHOSTBYNAME_ARG_CAST "/**/") + endif() + check_include_files(arpa/inet.h HAVE_ARPA_INET_H) + check_include_files(arpa/nameser.h HAVE_ARPA_NAMESER_H) + check_struct_has_member("struct sockaddr_storage" ss_family "sys/socket.h;sys/types.h" HAVE_SS_FAMILY) + check_struct_has_member("struct sockaddr_storage" __ss_family "sys/socket.h;sys/types.h" HAVE_BROKEN_SS_FAMILY) + if(HAVE_BROKEN_SS_FAMILY) + set(ss_family __ss_family) + endif() + check_function_exists(class HAVE_CLASS) + check_include_files(ctype.h HAVE_CTYPE_H) + check_include_files(dirent.h HAVE_DIRENT_H) + check_include_files(dlfcn.h HAVE_DLFCN_H) + check_library_exists(dl dlopen "" HAVE_DLOPEN) + check_include_files(dl.h HAVE_DL_H) + check_include_files(errno.h HAVE_ERRNO_H) + check_include_files(fcntl.h HAVE_FCNTL_H) + check_function_exists(finite HAVE_FINITE) + check_include_files(float.h HAVE_FLOAT_H) + check_function_exists(fpclass HAVE_FPCLASS) + check_function_exists(fprintf HAVE_FPRINTF) + check_function_exists(fp_class HAVE_FP_CLASS) + check_function_exists(ftime HAVE_FTIME) + check_function_exists(getaddrinfo HAVE_GETADDRINFO) + check_function_exists(gettimeofday HAVE_GETTIMEOFDAY) + check_include_files(inttypes.h HAVE_INTTYPES_H) + check_function_exists(isascii HAVE_ISASCII) + check_function_exists(isinf HAVE_ISINF) + check_function_exists(isnan HAVE_ISNAN) + check_function_exists(isnand HAVE_ISNAND) + check_library_exists(history append_history "" HAVE_LIBHISTORY) + check_library_exists(lzma lzma_code "" HAVE_LIBLZMA) + check_library_exists(pthread pthread_join "" HAVE_LIBPTHREAD) + check_library_exists(readline readline "" HAVE_LIBREADLINE) + check_library_exists(z gzread "" HAVE_LIBZ) + check_include_files(limits.h HAVE_LIMITS_H) + check_function_exists(localtime HAVE_LOCALTIME) + check_include_files(lzma.h HAVE_LZMA_H) + check_include_files(malloc.h HAVE_MALLOC_H) + check_include_files(math.h HAVE_MATH_H) + check_include_files(memory.h HAVE_MEMORY_H) + check_function_exists(mmap HAVE_MMAP) + check_function_exists(munmap HAVE_MUNMAP) + check_symbol_exists(DIR ndir.h HAVE_NDIR_H) + check_include_files(netdb.h HAVE_NETDB_H) + check_include_files(netinet/in.h HAVE_NETINET_IN_H) + check_include_files(poll.h HAVE_POLL_H) + check_function_exists(printf HAVE_PRINTF) + check_include_files(pthread.h HAVE_PTHREAD_H) + check_function_exists(putenv HAVE_PUTENV) + check_function_exists(rand HAVE_RAND) + check_function_exists(rand_r HAVE_RAND_R) + check_include_files(resolv.h HAVE_RESOLV_H) + check_library_exists(dld shl_load "" HAVE_SHLLOAD) + check_function_exists(signal HAVE_SIGNAL) + check_include_files(signal.h HAVE_SIGNAL_H) + check_function_exists(snprintf HAVE_SNPRINTF) + check_function_exists(sprintf HAVE_SPRINTF) + check_function_exists(srand HAVE_SRAND) + check_function_exists(sscanf HAVE_SSCANF) + check_function_exists(stat HAVE_STAT) + check_include_files(stdarg.h HAVE_STDARG_H) + check_include_files(stdint.h HAVE_STDINT_H) + check_include_files(stdlib.h HAVE_STDLIB_H) + check_function_exists(strftime HAVE_STRFTIME) + check_include_files(strings.h HAVE_STRINGS_H) + check_include_files(string.h HAVE_STRING_H) + check_symbol_exists(DIR sys/dir.h HAVE_SYS_DIR_H) + check_include_files(sys/mman.h HAVE_SYS_MMAN_H) + check_symbol_exists(DIR sys/ndir.h HAVE_SYS_NDIR_H) + check_include_files(sys/select.h HAVE_SYS_SELECT_H) + check_include_files(sys/socket.h HAVE_SYS_SOCKET_H) + check_include_files(sys/stat.h HAVE_SYS_STAT_H) + check_include_files(sys/timeb.h HAVE_SYS_TIMEB_H) + check_include_files(sys/time.h HAVE_SYS_TIME_H) + check_include_files(sys/types.h HAVE_SYS_TYPES_H) + check_function_exists(time HAVE_TIME) + check_include_files(time.h HAVE_TIME_H) + check_include_files(unistd.h HAVE_UNISTD_H) + check_function_exists(va_copy HAVE_VA_COPY) + check_function_exists(vfprintf HAVE_VFPRINTF) + check_function_exists(vsnprintf HAVE_VSNPRINTF) + check_function_exists(vsprintf HAVE_VSPRINTF) + check_function_exists(__va_copy HAVE___VA_COPY) + check_c_source_compiles(" + #include + #include + extern + #ifdef __cplusplus + \"C\" + #endif + #if defined(__STDC__) || defined(__cplusplus) + size_t iconv(iconv_t cd, char** inbuf, size_t* inbytesleft, char** outbuf, size_t* outbytesleft); + #else + size_t iconv(); + #endif + int main() { return 0; } + " ICONV_CONST_TEST) + if(NOT ICONV_CONST_TEST) + set(ICONV_CONST "const") + endif() + set(LT_OBJDIR ".libs/") + check_c_source_compiles(" + #include + #include + int main() { (void) send(1, (const char*) \"\", 1, 1); return 0; } + " SEND_ARG2_CAST_CONST) + if(NOT SEND_ARG2_CAST_CONST) + set(SEND_ARG2_CAST "(char *)") + else() + set(SEND_ARG2_CAST "/**/") + endif() + check_include_files("float.h;stdarg.h;stdlib.h;string.h" STDC_HEADERS) + check_c_source_compiles(" + #include + void a(va_list* ap) {}; + int main() { va_list ap1, ap2; a(&ap1); ap2 = (va_list) ap1; return 0; } + " VA_LIST_IS_ARRAY_TEST) + if(VA_LIST_IS_ARRAY_TEST) + set(VA_LIST_IS_ARRAY FALSE) + else() + set(VA_LIST_IS_ARRAY TRUE) + endif() + check_c_source_compiles(" + #include + #include + #include + int main() { (void) getsockopt(1, 1, 1, NULL, (socklen_t*) NULL); return 0; } + " XML_SOCKLEN_T_SOCKLEN_T) + if(XML_SOCKLEN_T_SOCKLEN_T) + set(XML_SOCKLEN_T socklen_t) + else() + check_c_source_compiles(" + #include + #include + #include + int main() { (void) getsockopt(1, 1, 1, NULL, (size_t*) NULL); return 0; } + " XML_SOCKLEN_T_SIZE_T) + if(XML_SOCKLEN_T_SIZE_T) + set(XML_SOCKLEN_T size_t) + else() + check_c_source_compiles(" + #include + #include + #include + int main() { (void) getsockopt (1, 1, 1, NULL, (int*) NULL); return 0; } + " XML_SOCKLEN_T_INT) + set(XML_SOCKLEN_T int) + endif() + endif() + configure_file(config.h.cmake.in config.h) +endif() + +set( + LIBXML2_HDRS + include/libxml/c14n.h + include/libxml/catalog.h + include/libxml/chvalid.h + include/libxml/debugXML.h + include/libxml/dict.h + include/libxml/DOCBparser.h + include/libxml/encoding.h + include/libxml/entities.h + include/libxml/globals.h + include/libxml/hash.h + include/libxml/HTMLparser.h + include/libxml/HTMLtree.h + include/libxml/list.h + include/libxml/nanoftp.h + include/libxml/nanohttp.h + include/libxml/parser.h + include/libxml/parserInternals.h + include/libxml/pattern.h + include/libxml/relaxng.h + include/libxml/SAX.h + include/libxml/SAX2.h + include/libxml/schemasInternals.h + include/libxml/schematron.h + include/libxml/threads.h + include/libxml/tree.h + include/libxml/uri.h + include/libxml/valid.h + include/libxml/xinclude.h + include/libxml/xlink.h + include/libxml/xmlIO.h + include/libxml/xmlautomata.h + include/libxml/xmlerror.h + include/libxml/xmlexports.h + include/libxml/xmlmemory.h + include/libxml/xmlmodule.h + include/libxml/xmlreader.h + include/libxml/xmlregexp.h + include/libxml/xmlsave.h + include/libxml/xmlschemas.h + include/libxml/xmlschemastypes.h + include/libxml/xmlstring.h + include/libxml/xmlunicode.h + include/libxml/xmlwriter.h + include/libxml/xpath.h + include/libxml/xpathInternals.h + include/libxml/xpointer.h +) + +set( + LIBXML2_SRCS + buf.c + c14n.c + catalog.c + chvalid.c + debugXML.c + dict.c + encoding.c + entities.c + error.c + globals.c + hash.c + HTMLparser.c + HTMLtree.c + legacy.c + list.c + nanoftp.c + nanohttp.c + parser.c + parserInternals.c + pattern.c + relaxng.c + SAX.c + SAX2.c + schematron.c + threads.c + tree.c + uri.c + valid.c + xinclude.c + xlink.c + xmlIO.c + xmlmemory.c + xmlmodule.c + xmlreader.c + xmlregexp.c + xmlsave.c + xmlschemas.c + xmlschemastypes.c + xmlstring.c + xmlunicode.c + xmlwriter.c + xpath.c + xpointer.c + xzlib.c +) + +if(WIN32) + list(APPEND LIBXML2_SRCS win32/libxml2.rc) + file( + WRITE + ${CMAKE_CURRENT_BINARY_DIR}/rcVersion.h + "#define LIBXML_MAJOR_VERSION ${LIBXML_MAJOR_VERSION}\n" + "#define LIBXML_MINOR_VERSION ${LIBXML_MINOR_VERSION}\n" + "#define LIBXML_MICRO_VERSION ${LIBXML_MICRO_VERSION}\n" + "#define LIBXML_DOTTED_VERSION \"${VERSION}\"\n" + ) +endif() + +if(LIBXML2_WITH_SAX1) + list(APPEND LIBXML2_SRCS DOCBparser.c) +endif() + +if(LIBXML2_WITH_TRIO) + list(APPEND LIBXML2_SRCS trio.c triostr.c) +endif() + +add_library(LibXml2 ${LIBXML2_HDRS} ${LIBXML2_SRCS}) +add_library(LibXml2::LibXml2 ALIAS LibXml2) + +if(NOT BUILD_SHARED_LIBS) + target_compile_definitions(LibXml2 INTERFACE LIBXML_STATIC) + set(XML_CFLAGS "-DLIBXML_STATIC") +endif() + +if(LIBXML2_WITH_THREADS) + target_compile_definitions(LibXml2 PRIVATE _REENTRANT) + if(WIN32) + target_compile_definitions(LibXml2 PRIVATE HAVE_WIN32_THREADS) + endif() +endif() + +target_include_directories( + LibXml2 + PUBLIC + $ + $ + $/${CMAKE_INSTALL_INCLUDEDIR}/libxml2> +) + +if(HAVE_DLOPEN) + target_link_libraries(LibXml2 PRIVATE dl) + set(MODULE_PLATFORM_LIBS "-ldl") +endif() + +if(HAVE_SHLLOAD) + target_link_libraries(LibXml2 PRIVATE dld) + set(MODULE_PLATFORM_LIBS "-ldld") +endif() + +if(UNIX) + target_link_libraries(LibXml2 PRIVATE m) + set(M_LIBS "-lm") +endif() + +if(WIN32) + target_link_libraries(LibXml2 PRIVATE ws2_32) + set(WIN32_EXTRA_LIBADD "-lws2_32") +endif() + +if(LIBXML2_WITH_ICONV) + target_link_libraries(LibXml2 PUBLIC Iconv::Iconv) + if(NOT Iconv_IS_BUILT_IN) + set(ICONV_LIBS "-liconv") + endif() +endif() + +if(LIBXML2_WITH_ICU) + target_link_libraries(LibXml2 PRIVATE ICU::data ICU::i18n ICU::uc) + if(WIN32) + set(ICU_LIBS "-licudt -licuin -licuuc") + else() + set(ICU_LIBS "-licudata -licui18n -licuuc") + endif() +endif() + +if(LIBXML2_WITH_LZMA) + target_link_libraries(LibXml2 PRIVATE LibLZMA::LibLZMA) + set(LZMA_LIBS "-llzma") +endif() + +if(LIBXML2_WITH_THREADS) + target_link_libraries(LibXml2 PRIVATE Threads::Threads) + set(THREAD_LIBS ${CMAKE_THREAD_LIBS_INIT}) +endif() + +if(LIBXML2_WITH_ZLIB) + target_link_libraries(LibXml2 PRIVATE ZLIB::ZLIB) + set(Z_LIBS "-lz") +endif() + +set_target_properties( + LibXml2 + PROPERTIES + IMPORT_PREFIX lib + OUTPUT_NAME xml2 + POSITION_INDEPENDENT_CODE ON + PREFIX lib + VERSION ${PROJECT_VERSION} +) + +if(MSVC) + if(BUILD_SHARED_LIBS) + set_target_properties( + LibXml2 + PROPERTIES + DEBUG_POSTFIX d + ) + else() + set_target_properties( + LibXml2 + PROPERTIES + DEBUG_POSTFIX sd + MINSIZEREL_POSTFIX s + RELEASE_POSTFIX s + RELWITHDEBINFO_POSTFIX s + ) + endif() +endif() + +install(FILES ${LIBXML2_HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/libxml2/libxml COMPONENT development) + +install( + TARGETS LibXml2 + EXPORT LibXml2 + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT development + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_COMPONENT development + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT runtime +) + +if(MSVC AND BUILD_SHARED_LIBS) + install(FILES $ DESTINATION ${CMAKE_INSTALL_BINDIR} CONFIGURATIONS Debug RelWithDebInfo COMPONENT debug) +endif() + +if(LIBXML2_WITH_PROGRAMS) + set( + PROGRAMS + xmlcatalog + xmllint + ) + foreach(PROGRAM ${PROGRAMS}) + add_executable(${PROGRAM} ${PROGRAM}.c) + add_executable(LibXml2::${PROGRAM} ALIAS ${PROGRAM}) + target_link_libraries(${PROGRAM} LibXml2) + if(HAVE_LIBHISTORY) + target_link_libraries(${PROGRAM} history) + endif() + if(HAVE_LIBREADLINE) + target_link_libraries(${PROGRAM} readline) + endif() + install(TARGETS ${PROGRAM} EXPORT LibXml2 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT programs) + endforeach() +endif() + +if(LIBXML2_WITH_TESTS) + enable_testing() + set( + TESTS + runxmlconf + runsuite + testapi + testAutomata + testC14N + testchar + testdict + testHTML + testModule + testlimits + testReader + testrecurse + testRegexp + testRelax + testSAX + testSchemas + testURI + testXPath + ) + foreach(TEST ${TESTS}) + add_executable(${TEST} ${TEST}.c) + target_link_libraries(${TEST} LibXml2) + endforeach() + if(EXISTS ${LIBXML2_XMLCONF_WORKING_DIR}/xmlconf/xmlconf.xml) + add_test(NAME runxmlconf COMMAND runxmlconf WORKING_DIRECTORY ${LIBXML2_XMLCONF_WORKING_DIR}) + endif() + if(NOT WIN32) + add_test(NAME testapi COMMAND testapi) + endif() + add_test(NAME testchar COMMAND testchar) + add_test(NAME testdict COMMAND testdict) + add_test(NAME testrecurse COMMAND testrecurse WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + if(Threads_FOUND) + set( + TESTS_THREADS + runtest + testThreads + ) + foreach(TEST ${TESTS_THREADS}) + add_executable(${TEST} ${TEST}.c) + if(WIN32) + target_compile_definitions(${TEST} PRIVATE HAVE_WIN32_THREADS) + endif() + target_link_libraries(${TEST} LibXml2 Threads::Threads) + endforeach() + add_test(NAME runtest COMMAND runtest --out ${CMAKE_CURRENT_BINARY_DIR} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_test(NAME testThreads COMMAND testThreads WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + endif() +endif() + +if(LIBXML2_WITH_PYTHON) + execute_process( + COMMAND + ${Python_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/python/generator.py + ${CMAKE_CURRENT_SOURCE_DIR}/doc/libxml2-api.xml + ${CMAKE_CURRENT_SOURCE_DIR}/python/libxml2-python-api.xml + WORKING_DIRECTORY + ${CMAKE_CURRENT_BINARY_DIR} + ) + file(READ python/libxml.py LIBXML_PY) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libxml2.py.in "${LIBXML_PY}") + file(READ ${CMAKE_CURRENT_BINARY_DIR}/libxml2class.py LIBXML2CLASS_PY) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/libxml2.py.in "${LIBXML2CLASS_PY}") + configure_file(${CMAKE_CURRENT_BINARY_DIR}/libxml2.py.in libxml2.py COPYONLY) + add_library( + LibXml2Mod + libxml2-py.c + libxml2-py.h + python/libxml.c + python/libxml_wrap.h + python/types.c + ) + target_include_directories( + LibXml2Mod + PUBLIC + $ + ) + target_link_libraries(LibXml2Mod LibXml2 Python::Python) + set_target_properties( + LibXml2Mod + PROPERTIES + IMPORT_PREFIX lib + OUTPUT_NAME xml2mod + PREFIX lib + VERSION ${PROJECT_VERSION} + ) + install( + TARGETS LibXml2Mod + ARCHIVE DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} COMPONENT development + LIBRARY DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} COMPONENT runtime NAMELINK_COMPONENT development + RUNTIME DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} COMPONENT runtime + ) + if(MSVC AND BUILD_SHARED_LIBS) + install(FILES $ DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} CONFIGURATIONS Debug RelWithDebInfo COMPONENT debug) + endif() + install(FILES python/drv_libxml2.py DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} COMPONENT runtime) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml2.py DESTINATION ${LIBXML2_PYTHON_INSTALL_DIR} COMPONENT runtime) +endif() + +install(FILES libxml.3 DESTINATION ${CMAKE_INSTALL_MANDIR}/man3 COMPONENT documentation) +install(FILES doc/xmlcatalog.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 COMPONENT documentation) +install(FILES doc/xmllint.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 COMPONENT documentation) +install(DIRECTORY doc/ DESTINATION ${CMAKE_INSTALL_DATADIR}/doc/libxml2 COMPONENT documentation PATTERN Makefile.* EXCLUDE) + +configure_package_config_file( + libxml2-config.cmake.cmake.in libxml2-config.cmake + INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libxml2-${PROJECT_VERSION} +) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml2-config.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libxml2-${PROJECT_VERSION} + COMPONENT development +) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/libxml2-config-version.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY ExactVersion +) + +install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml2-config-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libxml2-${PROJECT_VERSION} + COMPONENT development +) + +install( + EXPORT LibXml2 + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/libxml2-${PROJECT_VERSION} + NAMESPACE LibXml2:: + FILE libxml2-export.cmake + COMPONENT development +) + +configure_file(include/libxml/xmlversion.h.in libxml/xmlversion.h) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml/xmlversion.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/libxml2/libxml COMPONENT development) + +if(MSVC) + configure_file(include/libxml/xmlwin32version.h.in libxml/xmlwin32version.h) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml/xmlwin32version.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/libxml2/libxml COMPONENT development) +endif() + +if(LIBXML2_WITH_PYTHON) + set(prefix "${CMAKE_INSTALL_PREFIX}") + configure_file(python/setup.py.in setup.py @ONLY) +endif() + +set(XML_INCLUDEDIR "-I\${includedir}/libxml2") +set(XML_LIBDIR "-L\${libdir}") +set(XML_LIBS "-lxml2") +set(XML_PRIVATE_LIBS "${Z_LIBS} ${LZMA_LIBS} ${THREAD_LIBS} ${ICONV_LIBS} ${ICU_LIBS} ${M_LIBS}") + +file(RELATIVE_PATH PACKAGE_RELATIVE_PATH "${CMAKE_INSTALL_FULL_LIBDIR}/pkgconfig" "${CMAKE_INSTALL_PREFIX}") +string(REGEX REPLACE "/$" "" PACKAGE_RELATIVE_PATH "${PACKAGE_RELATIVE_PATH}") + +set(prefix "\${pcfiledir}/${PACKAGE_RELATIVE_PATH}") +set(exec_prefix "\${prefix}") +set(libdir "\${prefix}/${CMAKE_INSTALL_LIBDIR}") +set(includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +configure_file(libxml-2.0.pc.in libxml-2.0.pc @ONLY) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libxml-2.0.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig COMPONENT development) + +set(prefix "\$(cd \"\$(dirname \"\$0\")\"; pwd -P)/..") +configure_file(xml2-config.in xml2-config @ONLY) +install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/xml2-config DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT development) + +set(XML_INCLUDEDIR "-I${CMAKE_INSTALL_FULL_INCLUDEDIR}/libxml2") +set(XML_LIBDIR "-L${CMAKE_INSTALL_FULL_LIBDIR}") +configure_file(xml2Conf.sh.in xml2Conf.sh @ONLY) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/xml2Conf.sh DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT development) + +set(CPACK_COMPONENT_DEVELOPMENT_DEPENDS runtime) +set(CPACK_COMPONENT_PROGRAMS_DEPENDS runtime) +set(CPACK_DEB_COMPONENT_INSTALL ON) +set(CPACK_DEBIAN_DEVELOPMENT_PACKAGE_DEPENDS "${PACKAGE_TARNAME}") +set(CPACK_DEBIAN_DEVELOPMENT_PACKAGE_NAME "${PACKAGE_TARNAME}-dev") +set(CPACK_DEBIAN_DEVELOPMENT_PACKAGE_SECTION "libdevel") +set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${PACKAGE_URL}) +set(CPACK_DEBIAN_PACKAGE_NAME ${PACKAGE_TARNAME}) +set(CPACK_DEBIAN_PACKAGE_SECTION "devel") +set(CPACK_DEBIAN_PROGRAMS_PACKAGE_DEPENDS "${PACKAGE_TARNAME}") +set(CPACK_DEBIAN_PROGRAMS_PACKAGE_NAME "${PACKAGE_TARNAME}-utils") +set(CPACK_DEBIAN_PROGRAMS_PACKAGE_SECTION "utils") +set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME ${PACKAGE_TARNAME}) +set(CPACK_DEBIAN_RUNTIME_PACKAGE_RECOMMENDS "${PACKAGE_TARNAME}-utils") +set(CPACK_DEBIAN_RUNTIME_PACKAGE_SECTION "libs") +set(CPACK_NSIS_PACKAGE_NAME ${PACKAGE_STRING}) +set(CPACK_NSIS_URL_INFO_ABOUT ${PACKAGE_URL}) +set(CPACK_PACKAGE_CONTACT ${PACKAGE_BUGREPORT}) +set(CPACK_PACKAGE_DISPLAY_NAME ${PACKAGE_STRING}) +set(CPACK_PACKAGE_INSTALL_DIRECTORY "${PACKAGE_TARNAME}-${PACKAGE_VERSION}") +set(CPACK_PACKAGE_NAME ${PACKAGE_TARNAME}) +set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION}) +set(CPACK_PACKAGE_VERSION_MAJOR ${LIBXML_MAJOR_VERSION}) +set(CPACK_PACKAGE_VERSION_MINOR ${LIBXML_MINOR_VERSION}) +set(CPACK_PACKAGE_VERSION_PATCH ${LIBXML_MICRO_VERSION}) +set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_SOURCE_DIR}/Copyright) +set(CPACK_RPM_COMPONENT_INSTALL ON) +set(CPACK_RPM_development_PACKAGE_NAME "${PACKAGE_NAME}-devel") +set(CPACK_RPM_development_PACKAGE_REQUIRES "${PACKAGE_NAME}") +set(CPACK_RPM_PACKAGE_GROUP "Development/Libraries") +set(CPACK_RPM_PACKAGE_NAME ${PACKAGE_TARNAME}) +set(CPACK_RPM_PACKAGE_URL ${PACKAGE_URL}) +set(CPACK_RPM_programs_PACKAGE_NAME "${PACKAGE_NAME}-utils") +set(CPACK_RPM_programs_PACKAGE_REQUIRES "${PACKAGE_NAME}") +set(CPACK_RPM_runtime_PACKAGE_NAME "${PACKAGE_NAME}") +set(CPACK_RPM_runtime_PACKAGE_SUGGESTS "${PACKAGE_NAME}-utils") + +include(CPack) diff --git a/third_party/libxml/src/COPYING b/third_party/libxml/src/COPYING deleted file mode 100644 index d61318502caf..000000000000 --- a/third_party/libxml/src/COPYING +++ /dev/null @@ -1,23 +0,0 @@ -Except where otherwise noted in the source code (e.g. the files hash.c, -list.c and the trio files, which are covered by a similar licence but -with different Copyright notices) all the files are: - - Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is fur- -nished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT- -NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/third_party/libxml/src/HTMLparser.c b/third_party/libxml/src/HTMLparser.c index 1312ddefb66f..f74213419f9f 100644 --- a/third_party/libxml/src/HTMLparser.c +++ b/third_party/libxml/src/HTMLparser.c @@ -299,7 +299,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) #define UPPER (toupper(*ctxt->input->cur)) -#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val) +#define SKIP(val) ctxt->input->cur += (val),ctxt->input->col+=(val) #define NXT(val) ctxt->input->cur[(val)] @@ -333,7 +333,7 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt) if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ - ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++; \ + ctxt->token = 0; ctxt->input->cur += l; \ } while (0) /************ @@ -417,6 +417,10 @@ htmlFindEncoding(xmlParserCtxtPtr ctxt) { static int htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + const unsigned char *cur; + unsigned char c; + unsigned int val; + if (ctxt->instate == XML_PARSER_EOF) return(0); @@ -424,99 +428,29 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { *len = 0; return(ctxt->token); } - if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { - /* - * We are supposed to handle UTF8, check it's valid - * From rfc2044: encoding of the Unicode values on UTF-8: - * - * UCS-4 range (hex.) UTF-8 octet sequence (binary) - * 0000 0000-0000 007F 0xxxxxxx - * 0000 0080-0000 07FF 110xxxxx 10xxxxxx - * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx - * - * Check for the 0x110000 limit too - */ - const unsigned char *cur = ctxt->input->cur; - unsigned char c; - unsigned int val; - - c = *cur; - if (c & 0x80) { - if (cur[1] == 0) { - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - if ((cur[1] & 0xc0) != 0x80) - goto encoding_error; - if ((c & 0xe0) == 0xe0) { + if (ctxt->charset != XML_CHAR_ENCODING_UTF8) { + xmlChar * guess; + xmlCharEncodingHandlerPtr handler; - if (cur[2] == 0) { - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - if ((cur[2] & 0xc0) != 0x80) - goto encoding_error; - if ((c & 0xf0) == 0xf0) { - if (cur[3] == 0) { - xmlParserInputGrow(ctxt->input, INPUT_CHUNK); - cur = ctxt->input->cur; - } - if (((c & 0xf8) != 0xf0) || - ((cur[3] & 0xc0) != 0x80)) - goto encoding_error; - /* 4-byte code */ - *len = 4; - val = (cur[0] & 0x7) << 18; - val |= (cur[1] & 0x3f) << 12; - val |= (cur[2] & 0x3f) << 6; - val |= cur[3] & 0x3f; - } else { - /* 3-byte code */ - *len = 3; - val = (cur[0] & 0xf) << 12; - val |= (cur[1] & 0x3f) << 6; - val |= cur[2] & 0x3f; - } - } else { - /* 2-byte code */ - *len = 2; - val = (cur[0] & 0x1f) << 6; - val |= cur[1] & 0x3f; - } - if (!IS_CHAR(val)) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", val); - } - return(val); - } else { + /* + * Assume it's a fixed length encoding (1) with + * a compatible encoding for the ASCII set, since + * HTML constructs only use < 128 chars + */ + if ((int) *ctxt->input->cur < 0x80) { + *len = 1; if ((*ctxt->input->cur == 0) && (ctxt->input->cur < ctxt->input->end)) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Char 0x%X out of allowed range\n", 0); - *len = 1; + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", 0); return(' '); } - /* 1-byte code */ - *len = 1; - return((int) *ctxt->input->cur); - } - } - /* - * Assume it's a fixed length encoding (1) with - * a compatible encoding for the ASCII set, since - * XML constructs only use < 128 chars - */ - *len = 1; - if ((int) *ctxt->input->cur < 0x80) - return((int) *ctxt->input->cur); - - /* - * Humm this is bad, do an automatic flow conversion - */ - { - xmlChar * guess; - xmlCharEncodingHandlerPtr handler; + return((int) *ctxt->input->cur); + } + /* + * Humm this is bad, do an automatic flow conversion + */ guess = htmlFindEncoding(ctxt); if (guess == NULL) { xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); @@ -526,7 +460,12 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { ctxt->input->encoding = guess; handler = xmlFindCharEncodingHandler((const char *) guess); if (handler != NULL) { - xmlSwitchToEncoding(ctxt, handler); + /* + * Don't use UTF-8 encoder which isn't required and + * can produce invalid UTF-8. + */ + if (!xmlStrEqual(BAD_CAST handler->name, BAD_CAST "UTF-8")) + xmlSwitchToEncoding(ctxt, handler); } else { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "Unsupported encoding %s", guess, NULL); @@ -535,7 +474,86 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { ctxt->charset = XML_CHAR_ENCODING_UTF8; } - return(xmlCurrentChar(ctxt, len)); + /* + * We are supposed to handle UTF8, check it's valid + * From rfc2044: encoding of the Unicode values on UTF-8: + * + * UCS-4 range (hex.) UTF-8 octet sequence (binary) + * 0000 0000-0000 007F 0xxxxxxx + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx + * + * Check for the 0x110000 limit too + */ + cur = ctxt->input->cur; + c = *cur; + if (c & 0x80) { + if ((c & 0x40) == 0) + goto encoding_error; + if (cur[1] == 0) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + if ((cur[1] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xe0) == 0xe0) { + + if (cur[2] == 0) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + if ((cur[2] & 0xc0) != 0x80) + goto encoding_error; + if ((c & 0xf0) == 0xf0) { + if (cur[3] == 0) { + xmlParserInputGrow(ctxt->input, INPUT_CHUNK); + cur = ctxt->input->cur; + } + if (((c & 0xf8) != 0xf0) || + ((cur[3] & 0xc0) != 0x80)) + goto encoding_error; + /* 4-byte code */ + *len = 4; + val = (cur[0] & 0x7) << 18; + val |= (cur[1] & 0x3f) << 12; + val |= (cur[2] & 0x3f) << 6; + val |= cur[3] & 0x3f; + if (val < 0x10000) + goto encoding_error; + } else { + /* 3-byte code */ + *len = 3; + val = (cur[0] & 0xf) << 12; + val |= (cur[1] & 0x3f) << 6; + val |= cur[2] & 0x3f; + if (val < 0x800) + goto encoding_error; + } + } else { + /* 2-byte code */ + *len = 2; + val = (cur[0] & 0x1f) << 6; + val |= cur[1] & 0x3f; + if (val < 0x80) + goto encoding_error; + } + if (!IS_CHAR(val)) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", val); + } + return(val); + } else { + if ((*ctxt->input->cur == 0) && + (ctxt->input->cur < ctxt->input->end)) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Char 0x%X out of allowed range\n", 0); + *len = 1; + return(' '); + } + /* 1-byte code */ + *len = 1; + return((int) *ctxt->input->cur); + } encoding_error: /* @@ -560,7 +578,16 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { BAD_CAST buffer, NULL); } - ctxt->charset = XML_CHAR_ENCODING_8859_1; + /* + * Don't switch encodings twice. Note that if there's an encoder, we + * shouldn't receive invalid UTF-8 anyway. + * + * Note that if ctxt->input->buf == NULL, switching encodings is + * impossible, see Gitlab issue #34. + */ + if ((ctxt->input->buf != NULL) && + (ctxt->input->buf->encoder == NULL)) + xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); *len = 1; return((int) *ctxt->input->cur); } @@ -587,7 +614,6 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { ctxt->input->line++; ctxt->input->col = 1; } else ctxt->input->col++; ctxt->input->cur++; - ctxt->nbChars++; if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); } @@ -1049,102 +1075,266 @@ html40ElementTable[] = { } }; +typedef struct { + const char *oldTag; + const char *newTag; +} htmlStartCloseEntry; + /* * start tags that imply the end of current element */ -static const char * const htmlStartClose[] = { -"form", "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6", - "dl", "ul", "ol", "menu", "dir", "address", "pre", - "listing", "xmp", "head", NULL, -"head", "p", NULL, -"title", "p", NULL, -"body", "head", "style", "link", "title", "p", NULL, -"frameset", "head", "style", "link", "title", "p", NULL, -"li", "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address", - "pre", "listing", "xmp", "head", "li", NULL, -"hr", "p", "head", NULL, -"h1", "p", "head", NULL, -"h2", "p", "head", NULL, -"h3", "p", "head", NULL, -"h4", "p", "head", NULL, -"h5", "p", "head", NULL, -"h6", "p", "head", NULL, -"dir", "p", "head", NULL, -"address", "p", "head", "ul", NULL, -"pre", "p", "head", "ul", NULL, -"listing", "p", "head", NULL, -"xmp", "p", "head", NULL, -"blockquote", "p", "head", NULL, -"dl", "p", "dt", "menu", "dir", "address", "pre", "listing", - "xmp", "head", NULL, -"dt", "p", "menu", "dir", "address", "pre", "listing", "xmp", - "head", "dd", NULL, -"dd", "p", "menu", "dir", "address", "pre", "listing", "xmp", - "head", "dt", NULL, -"ul", "p", "head", "ol", "menu", "dir", "address", "pre", - "listing", "xmp", NULL, -"ol", "p", "head", "ul", NULL, -"menu", "p", "head", "ul", NULL, -"p", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", FONTSTYLE, NULL, -"div", "p", "head", NULL, -"noscript", "script", NULL, -"center", "font", "b", "i", "p", "head", NULL, -"a", "a", "head", NULL, -"caption", "p", NULL, -"colgroup", "caption", "colgroup", "col", "p", NULL, -"col", "caption", "col", "p", NULL, -"table", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre", - "listing", "xmp", "a", NULL, -"th", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, -"td", "th", "td", "p", "span", "font", "a", "b", "i", "u", NULL, -"tr", "th", "td", "tr", "caption", "col", "colgroup", "p", NULL, -"thead", "caption", "col", "colgroup", NULL, -"tfoot", "th", "td", "tr", "caption", "col", "colgroup", "thead", - "tbody", "p", NULL, -"tbody", "th", "td", "tr", "caption", "col", "colgroup", "thead", - "tfoot", "tbody", "p", NULL, -"optgroup", "option", NULL, -"option", "option", NULL, -"fieldset", "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", - "pre", "listing", "xmp", "a", NULL, -/* most tags in in FONTSTYLE, PHRASE and SPECIAL should close */ -"tt", "head", NULL, -"i", "head", NULL, -"b", "head", NULL, -"u", "head", NULL, -"s", "head", NULL, -"strike", "head", NULL, -"big", "head", NULL, -"small", "head", NULL, - -"em", "head", NULL, -"strong", "head", NULL, -"dfn", "head", NULL, -"code", "head", NULL, -"samp", "head", NULL, -"kbd", "head", NULL, -"var", "head", NULL, -"cite", "head", NULL, -"abbr", "head", NULL, -"acronym", "head", NULL, - -/* "a" */ -"img", "head", NULL, -/* "applet" */ -/* "embed" */ -/* "object" */ -"font", "head", NULL, -/* "basefont" */ -"br", "head", NULL, -/* "script" */ -"map", "head", NULL, -"q", "head", NULL, -"sub", "head", NULL, -"sup", "head", NULL, -"span", "head", NULL, -"bdo", "head", NULL, -"iframe", "head", NULL, -NULL +static const htmlStartCloseEntry htmlStartClose[] = { + { "a", "a" }, + { "a", "fieldset" }, + { "a", "table" }, + { "a", "td" }, + { "a", "th" }, + { "address", "dd" }, + { "address", "dl" }, + { "address", "dt" }, + { "address", "form" }, + { "address", "li" }, + { "address", "ul" }, + { "b", "center" }, + { "b", "p" }, + { "b", "td" }, + { "b", "th" }, + { "big", "p" }, + { "caption", "col" }, + { "caption", "colgroup" }, + { "caption", "tbody" }, + { "caption", "tfoot" }, + { "caption", "thead" }, + { "caption", "tr" }, + { "col", "col" }, + { "col", "colgroup" }, + { "col", "tbody" }, + { "col", "tfoot" }, + { "col", "thead" }, + { "col", "tr" }, + { "colgroup", "colgroup" }, + { "colgroup", "tbody" }, + { "colgroup", "tfoot" }, + { "colgroup", "thead" }, + { "colgroup", "tr" }, + { "dd", "dt" }, + { "dir", "dd" }, + { "dir", "dl" }, + { "dir", "dt" }, + { "dir", "form" }, + { "dir", "ul" }, + { "dl", "form" }, + { "dl", "li" }, + { "dt", "dd" }, + { "dt", "dl" }, + { "font", "center" }, + { "font", "td" }, + { "font", "th" }, + { "form", "form" }, + { "h1", "fieldset" }, + { "h1", "form" }, + { "h1", "li" }, + { "h1", "p" }, + { "h1", "table" }, + { "h2", "fieldset" }, + { "h2", "form" }, + { "h2", "li" }, + { "h2", "p" }, + { "h2", "table" }, + { "h3", "fieldset" }, + { "h3", "form" }, + { "h3", "li" }, + { "h3", "p" }, + { "h3", "table" }, + { "h4", "fieldset" }, + { "h4", "form" }, + { "h4", "li" }, + { "h4", "p" }, + { "h4", "table" }, + { "h5", "fieldset" }, + { "h5", "form" }, + { "h5", "li" }, + { "h5", "p" }, + { "h5", "table" }, + { "h6", "fieldset" }, + { "h6", "form" }, + { "h6", "li" }, + { "h6", "p" }, + { "h6", "table" }, + { "head", "a" }, + { "head", "abbr" }, + { "head", "acronym" }, + { "head", "address" }, + { "head", "b" }, + { "head", "bdo" }, + { "head", "big" }, + { "head", "blockquote" }, + { "head", "body" }, + { "head", "br" }, + { "head", "center" }, + { "head", "cite" }, + { "head", "code" }, + { "head", "dd" }, + { "head", "dfn" }, + { "head", "dir" }, + { "head", "div" }, + { "head", "dl" }, + { "head", "dt" }, + { "head", "em" }, + { "head", "fieldset" }, + { "head", "font" }, + { "head", "form" }, + { "head", "frameset" }, + { "head", "h1" }, + { "head", "h2" }, + { "head", "h3" }, + { "head", "h4" }, + { "head", "h5" }, + { "head", "h6" }, + { "head", "hr" }, + { "head", "i" }, + { "head", "iframe" }, + { "head", "img" }, + { "head", "kbd" }, + { "head", "li" }, + { "head", "listing" }, + { "head", "map" }, + { "head", "menu" }, + { "head", "ol" }, + { "head", "p" }, + { "head", "pre" }, + { "head", "q" }, + { "head", "s" }, + { "head", "samp" }, + { "head", "small" }, + { "head", "span" }, + { "head", "strike" }, + { "head", "strong" }, + { "head", "sub" }, + { "head", "sup" }, + { "head", "table" }, + { "head", "tt" }, + { "head", "u" }, + { "head", "ul" }, + { "head", "var" }, + { "head", "xmp" }, + { "hr", "form" }, + { "i", "center" }, + { "i", "p" }, + { "i", "td" }, + { "i", "th" }, + { "legend", "fieldset" }, + { "li", "li" }, + { "link", "body" }, + { "link", "frameset" }, + { "listing", "dd" }, + { "listing", "dl" }, + { "listing", "dt" }, + { "listing", "fieldset" }, + { "listing", "form" }, + { "listing", "li" }, + { "listing", "table" }, + { "listing", "ul" }, + { "menu", "dd" }, + { "menu", "dl" }, + { "menu", "dt" }, + { "menu", "form" }, + { "menu", "ul" }, + { "ol", "form" }, + { "ol", "ul" }, + { "option", "optgroup" }, + { "option", "option" }, + { "p", "address" }, + { "p", "blockquote" }, + { "p", "body" }, + { "p", "caption" }, + { "p", "center" }, + { "p", "col" }, + { "p", "colgroup" }, + { "p", "dd" }, + { "p", "dir" }, + { "p", "div" }, + { "p", "dl" }, + { "p", "dt" }, + { "p", "fieldset" }, + { "p", "form" }, + { "p", "frameset" }, + { "p", "h1" }, + { "p", "h2" }, + { "p", "h3" }, + { "p", "h4" }, + { "p", "h5" }, + { "p", "h6" }, + { "p", "head" }, + { "p", "hr" }, + { "p", "li" }, + { "p", "listing" }, + { "p", "menu" }, + { "p", "ol" }, + { "p", "p" }, + { "p", "pre" }, + { "p", "table" }, + { "p", "tbody" }, + { "p", "td" }, + { "p", "tfoot" }, + { "p", "th" }, + { "p", "title" }, + { "p", "tr" }, + { "p", "ul" }, + { "p", "xmp" }, + { "pre", "dd" }, + { "pre", "dl" }, + { "pre", "dt" }, + { "pre", "fieldset" }, + { "pre", "form" }, + { "pre", "li" }, + { "pre", "table" }, + { "pre", "ul" }, + { "s", "p" }, + { "script", "noscript" }, + { "small", "p" }, + { "span", "td" }, + { "span", "th" }, + { "strike", "p" }, + { "style", "body" }, + { "style", "frameset" }, + { "tbody", "tbody" }, + { "tbody", "tfoot" }, + { "td", "tbody" }, + { "td", "td" }, + { "td", "tfoot" }, + { "td", "th" }, + { "td", "tr" }, + { "tfoot", "tbody" }, + { "th", "tbody" }, + { "th", "td" }, + { "th", "tfoot" }, + { "th", "th" }, + { "th", "tr" }, + { "thead", "tbody" }, + { "thead", "tfoot" }, + { "title", "body" }, + { "title", "frameset" }, + { "tr", "tbody" }, + { "tr", "tfoot" }, + { "tr", "tr" }, + { "tt", "p" }, + { "u", "p" }, + { "u", "td" }, + { "u", "th" }, + { "ul", "address" }, + { "ul", "form" }, + { "ul", "menu" }, + { "ul", "ol" }, + { "ul", "pre" }, + { "xmp", "dd" }, + { "xmp", "dl" }, + { "xmp", "dt" }, + { "xmp", "fieldset" }, + { "xmp", "form" }, + { "xmp", "li" }, + { "xmp", "table" }, + { "xmp", "ul" } }; /* @@ -1214,9 +1404,6 @@ static const elementPriority htmlEndPriority[] = { {NULL, 100} /* Default priority */ }; -static const char** htmlStartCloseIndex[100]; -static int htmlStartCloseIndexinitialized = 0; - /************************************************************************ * * * functions to handle HTML specific data * @@ -1226,24 +1413,18 @@ static int htmlStartCloseIndexinitialized = 0; /** * htmlInitAutoClose: * - * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. - * This is not reentrant. Call xmlInitParser() once before processing in - * case of use in multithreaded programs. + * This is a no-op now. */ void htmlInitAutoClose(void) { - int indx, i = 0; +} - if (htmlStartCloseIndexinitialized) return; +static int +htmlCompareTags(const void *key, const void *member) { + const xmlChar *tag = (const xmlChar *) key; + const htmlElemDesc *desc = (const htmlElemDesc *) member; - for (indx = 0;indx < 100;indx ++) htmlStartCloseIndex[indx] = NULL; - indx = 0; - while ((htmlStartClose[i] != NULL) && (indx < 100 - 1)) { - htmlStartCloseIndex[indx++] = (const char**) &htmlStartClose[i]; - while (htmlStartClose[i] != NULL) i++; - i++; - } - htmlStartCloseIndexinitialized = 1; + return(xmlStrcasecmp(tag, BAD_CAST desc->name)); } /** @@ -1256,14 +1437,12 @@ htmlInitAutoClose(void) { */ const htmlElemDesc * htmlTagLookup(const xmlChar *tag) { - unsigned int i; + if (tag == NULL) + return(NULL); - for (i = 0; i < (sizeof(html40ElementTable) / - sizeof(html40ElementTable[0]));i++) { - if (!xmlStrcasecmp(tag, BAD_CAST html40ElementTable[i].name)) - return((htmlElemDescPtr) &html40ElementTable[i]); - } - return(NULL); + return((const htmlElemDesc *) bsearch(tag, html40ElementTable, + sizeof(html40ElementTable) / sizeof(htmlElemDesc), + sizeof(htmlElemDesc), htmlCompareTags)); } /** @@ -1284,6 +1463,19 @@ htmlGetEndPriority (const xmlChar *name) { } +static int +htmlCompareStartClose(const void *vkey, const void *member) { + const htmlStartCloseEntry *key = (const htmlStartCloseEntry *) vkey; + const htmlStartCloseEntry *entry = (const htmlStartCloseEntry *) member; + int ret; + + ret = strcmp(key->oldTag, entry->oldTag); + if (ret == 0) + ret = strcmp(key->newTag, entry->newTag); + + return(ret); +} + /** * htmlCheckAutoClose: * @newtag: The new tag name @@ -1291,37 +1483,21 @@ htmlGetEndPriority (const xmlChar *name) { * * Checks whether the new tag is one of the registered valid tags for * closing old. - * Initialize the htmlStartCloseIndex for fast lookup of closing tags names. * * Returns 0 if no, 1 if yes. */ static int htmlCheckAutoClose(const xmlChar * newtag, const xmlChar * oldtag) { - int i, indx; - const char **closed = NULL; - - if (htmlStartCloseIndexinitialized == 0) - htmlInitAutoClose(); - - /* inefficient, but not a big deal */ - for (indx = 0; indx < 100; indx++) { - closed = htmlStartCloseIndex[indx]; - if (closed == NULL) - return (0); - if (xmlStrEqual(BAD_CAST * closed, newtag)) - break; - } - - i = closed - htmlStartClose; - i++; - while (htmlStartClose[i] != NULL) { - if (xmlStrEqual(BAD_CAST htmlStartClose[i], oldtag)) { - return (1); - } - i++; - } - return (0); + htmlStartCloseEntry key; + void *res; + + key.oldTag = (const char *) oldtag; + key.newTag = (const char *) newtag; + res = bsearch(&key, htmlStartClose, + sizeof(htmlStartClose) / sizeof(htmlStartCloseEntry), + sizeof(htmlStartCloseEntry), htmlCompareStartClose); + return(res != NULL); } /** @@ -2489,7 +2665,6 @@ htmlParseName(htmlParserCtxtPtr ctxt) { count = in - ctxt->input->cur; ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; return(ret); } @@ -2796,47 +2971,39 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { static xmlChar * htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { size_t len = 0, startPosition = 0; + int err = 0; + int quote; xmlChar *ret = NULL; - if (CUR == '"') { - NEXT; + if ((CUR != '"') && (CUR != '\'')) { + htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, + "SystemLiteral \" or ' expected\n", NULL, NULL); + return(NULL); + } + quote = CUR; + NEXT; - if (CUR_PTR < BASE_PTR) - return(ret); - startPosition = CUR_PTR - BASE_PTR; + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; - while ((IS_CHAR_CH(CUR)) && (CUR != '"')) { - NEXT; - len++; - } - if (!IS_CHAR_CH(CUR)) { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, - "Unfinished SystemLiteral\n", NULL, NULL); - } else { - ret = xmlStrndup((BASE_PTR+startPosition), len); - NEXT; + while ((CUR != 0) && (CUR != quote)) { + /* TODO: Handle UTF-8 */ + if (!IS_CHAR_CH(CUR)) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in SystemLiteral 0x%X\n", CUR); + err = 1; } - } else if (CUR == '\'') { NEXT; - - if (CUR_PTR < BASE_PTR) - return(ret); - startPosition = CUR_PTR - BASE_PTR; - - while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) { - NEXT; - len++; - } - if (!IS_CHAR_CH(CUR)) { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, - "Unfinished SystemLiteral\n", NULL, NULL); - } else { - ret = xmlStrndup((BASE_PTR+startPosition), len); - NEXT; - } + len++; + } + if (CUR != quote) { + htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, + "Unfinished SystemLiteral\n", NULL, NULL); } else { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, - " or ' expected\n", NULL, NULL); + NEXT; + if (err == 0) + ret = xmlStrndup((BASE_PTR+startPosition), len); } return(ret); @@ -2856,51 +3023,42 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) { static xmlChar * htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) { size_t len = 0, startPosition = 0; + int err = 0; + int quote; xmlChar *ret = NULL; + + if ((CUR != '"') && (CUR != '\'')) { + htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, + "PubidLiteral \" or ' expected\n", NULL, NULL); + return(NULL); + } + quote = CUR; + NEXT; + /* * Name ::= (Letter | '_') (NameChar)* */ - if (CUR == '"') { - NEXT; - - if (CUR_PTR < BASE_PTR) - return(ret); - startPosition = CUR_PTR - BASE_PTR; - - while (IS_PUBIDCHAR_CH(CUR)) { - len++; - NEXT; + if (CUR_PTR < BASE_PTR) + return(ret); + startPosition = CUR_PTR - BASE_PTR; + + while ((CUR != 0) && (CUR != quote)) { + if (!IS_PUBIDCHAR_CH(CUR)) { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in PubidLiteral 0x%X\n", CUR); + err = 1; } - - if (CUR != '"') { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, - "Unfinished PubidLiteral\n", NULL, NULL); - } else { - ret = xmlStrndup((BASE_PTR + startPosition), len); - NEXT; - } - } else if (CUR == '\'') { + len++; NEXT; + } - if (CUR_PTR < BASE_PTR) - return(ret); - startPosition = CUR_PTR - BASE_PTR; - - while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){ - len++; - NEXT; - } - - if (CUR != '\'') { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, - "Unfinished PubidLiteral\n", NULL, NULL); - } else { - ret = xmlStrndup((BASE_PTR + startPosition), len); - NEXT; - } + if (CUR != '"') { + htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, + "Unfinished PubidLiteral\n", NULL, NULL); } else { - htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, - "PubidLiteral \" or ' expected\n", NULL, NULL); + NEXT; + if (err == 0) + ret = xmlStrndup((BASE_PTR + startPosition), len); } return(ret); @@ -2935,7 +3093,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { SHRINK; cur = CUR_CHAR(l); - while (IS_CHAR_CH(cur)) { + while (cur != 0) { if ((cur == '<') && (NXT(1) == '/')) { /* * One should break here, the specification is clear: @@ -2966,7 +3124,12 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { } } } - COPY_BUF(l,buf,nbchar,cur); + if (IS_CHAR(cur)) { + COPY_BUF(l,buf,nbchar,cur); + } else { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in CDATA 0x%X\n", cur); + } if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { buf[nbchar] = 0; if (ctxt->sax->cdataBlock!= NULL) { @@ -2984,14 +3147,6 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } - if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Invalid char in CDATA 0x%X\n", cur); - if (ctxt->input->cur < ctxt->input->end) { - NEXT; - } - } - if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { buf[nbchar] = 0; if (ctxt->sax->cdataBlock!= NULL) { @@ -3239,7 +3394,7 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { } SKIP_BLANKS; cur = CUR_CHAR(l); - while (IS_CHAR(cur) && (cur != '>')) { + while ((cur != 0) && (cur != '>')) { if (len + 5 >= size) { xmlChar *tmp; @@ -3258,7 +3413,13 @@ htmlParsePI(htmlParserCtxtPtr ctxt) { GROW; count = 0; } - COPY_BUF(l,buf,len,cur); + if (IS_CHAR(cur)) { + COPY_BUF(l,buf,len,cur); + } else { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in processing instruction " + "0x%X\n", cur); + } NEXTL(l); cur = CUR_CHAR(l); if (cur == 0) { @@ -3307,6 +3468,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { int q, ql; int r, rl; int cur, l; + int next, nl; xmlParserInputState state; /* @@ -3328,17 +3490,32 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { len = 0; buf[len] = 0; q = CUR_CHAR(ql); - if (!IS_CHAR(q)) + if (q == 0) goto unfinished; NEXTL(ql); r = CUR_CHAR(rl); - if (!IS_CHAR(r)) + if (r == 0) goto unfinished; NEXTL(rl); cur = CUR_CHAR(l); - while (IS_CHAR(cur) && + while ((cur != 0) && ((cur != '>') || (r != '-') || (q != '-'))) { + NEXTL(l); + next = CUR_CHAR(nl); + if (next == 0) { + SHRINK; + GROW; + next = CUR_CHAR(nl); + } + + if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) { + htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment incorrectly closed by '--!>'", NULL, NULL); + cur = '>'; + break; + } + if (len + 5 >= size) { xmlChar *tmp; @@ -3352,21 +3529,22 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { } buf = tmp; } - COPY_BUF(ql,buf,len,q); + if (IS_CHAR(q)) { + COPY_BUF(ql,buf,len,q); + } else { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in comment 0x%X\n", q); + } + q = r; ql = rl; r = cur; rl = l; - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } + cur = next; + l = nl; } buf[len] = 0; - if (IS_CHAR(cur)) { + if (cur == '>') { NEXT; if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && (!ctxt->disableSAX)) @@ -3407,13 +3585,16 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) { ((NXT(2) == 'x') || NXT(2) == 'X')) { SKIP(3); while (CUR != ';') { - if ((CUR >= '0') && (CUR <= '9')) - val = val * 16 + (CUR - '0'); - else if ((CUR >= 'a') && (CUR <= 'f')) - val = val * 16 + (CUR - 'a') + 10; - else if ((CUR >= 'A') && (CUR <= 'F')) - val = val * 16 + (CUR - 'A') + 10; - else { + if ((CUR >= '0') && (CUR <= '9')) { + if (val < 0x110000) + val = val * 16 + (CUR - '0'); + } else if ((CUR >= 'a') && (CUR <= 'f')) { + if (val < 0x110000) + val = val * 16 + (CUR - 'a') + 10; + } else if ((CUR >= 'A') && (CUR <= 'F')) { + if (val < 0x110000) + val = val * 16 + (CUR - 'A') + 10; + } else { htmlParseErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, "htmlParseCharRef: missing semicolon\n", NULL, NULL); @@ -3426,9 +3607,10 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) { } else if ((CUR == '&') && (NXT(1) == '#')) { SKIP(2); while (CUR != ';') { - if ((CUR >= '0') && (CUR <= '9')) - val = val * 10 + (CUR - '0'); - else { + if ((CUR >= '0') && (CUR <= '9')) { + if (val < 0x110000) + val = val * 10 + (CUR - '0'); + } else { htmlParseErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, "htmlParseCharRef: missing semicolon\n", NULL, NULL); @@ -3447,6 +3629,9 @@ htmlParseCharRef(htmlParserCtxtPtr ctxt) { */ if (IS_CHAR(val)) { return(val); + } else if (val >= 0x110000) { + htmlParseErr(ctxt, XML_ERR_INVALID_CHAR, + "htmlParseCharRef: value too large\n", NULL, NULL); } else { htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, "htmlParseCharRef: invalid xmlChar value %d\n", @@ -3506,9 +3691,12 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) { if (CUR != '>') { htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, "DOCTYPE improperly terminated\n", NULL, NULL); - /* We shouldn't try to resynchronize ... */ + /* Ignore bogus content */ + while ((CUR != 0) && (CUR != '>')) + NEXT; } - NEXT; + if (CUR == '>') + NEXT; /* * Create or update the document accordingly to the DOCTYPE @@ -3786,7 +3974,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { /* Dump the bogus tag like browsers do */ - while ((IS_CHAR_CH(CUR)) && (CUR != '>') && + while ((CUR != 0) && (CUR != '>') && (ctxt->instate != XML_PARSER_EOF)) NEXT; return -1; @@ -3842,11 +4030,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { * (S Attribute)* S? */ SKIP_BLANKS; - while ((IS_CHAR_CH(CUR)) && + while ((CUR != 0) && (CUR != '>') && ((CUR != '/') || (NXT(1) != '>'))) { - long cons = ctxt->nbChars; - GROW; attname = htmlParseAttribute(ctxt, &attvalue); if (attname != NULL) { @@ -3905,7 +4091,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { xmlFree(attvalue); /* Dump the bogus attribute string up to the next blank or * the end of the tag. */ - while ((IS_CHAR_CH(CUR)) && + while ((CUR != 0) && !(IS_BLANK_CH(CUR)) && (CUR != '>') && ((CUR != '/') || (NXT(1) != '>'))) NEXT; @@ -3913,12 +4099,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { failed: SKIP_BLANKS; - if (cons == ctxt->nbChars) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseStartTag: problem parsing attributes\n", - NULL, NULL); - break; - } } /* @@ -3986,19 +4166,14 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) * We should definitely be at the ending "S? '>'" part */ SKIP_BLANKS; - if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) { + if (CUR != '>') { htmlParseErr(ctxt, XML_ERR_GT_REQUIRED, "End tag : expected '>'\n", NULL, NULL); - if (ctxt->recovery) { - /* - * We're not at the ending > !! - * Error, unless in recover mode where we search forwards - * until we find a > - */ - while (CUR != '\0' && CUR != '>') NEXT; - NEXT; - } - } else + /* Skip to next '>' */ + while ((CUR != 0) && (CUR != '>')) + NEXT; + } + if (CUR == '>') NEXT; /* @@ -4039,12 +4214,10 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) * With the exception that the autoclose may have popped stuff out * of the stack. */ - if (!xmlStrEqual(name, ctxt->name)) { - if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) { - htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, - "Opening and ending tag mismatch: %s and %s\n", - name, ctxt->name); - } + if ((ctxt->name != NULL) && (!xmlStrEqual(ctxt->name, name))) { + htmlParseErr(ctxt, XML_ERR_TAG_NAME_MISMATCH, + "Opening and ending tag mismatch: %s and %s\n", + name, ctxt->name); } /* @@ -4159,8 +4332,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; while (1) { - long cons = ctxt->nbChars; - GROW; if (ctxt->instate == XML_PARSER_EOF) @@ -4188,7 +4359,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { "htmlParseStartTag: invalid element name\n", NULL, NULL); /* Dump the bogus tag like browsers do */ - while ((IS_CHAR_CH(CUR)) && (CUR != '>')) + while ((CUR != 0) && (CUR != '>')) NEXT; if (currentNode != NULL) @@ -4280,15 +4451,6 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { else { htmlParseCharData(ctxt); } - - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - break; - } } GROW; } @@ -4403,7 +4565,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { */ currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; - while (IS_CHAR_CH(CUR)) { + while (CUR != 0) { oldptr = ctxt->input->cur; htmlParseContent(ctxt); if (oldptr==ctxt->input->cur) break; @@ -4420,7 +4582,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { node_info.node = ctxt->node; xmlParserAddNodeInfo(ctxt, &node_info); } - if (!IS_CHAR_CH(CUR)) { + if (CUR == 0) { htmlAutoCloseOnEnd(ctxt); } @@ -4441,7 +4603,7 @@ htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) { xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo); htmlNodeInfoPop(ctxt); } - if (!IS_CHAR_CH(CUR)) { + if (CUR == 0) { htmlAutoCloseOnEnd(ctxt); } } @@ -4559,8 +4721,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { currentNode = xmlStrdup(ctxt->name); depth = ctxt->nameNr; while (1) { - long cons = ctxt->nbChars; - GROW; if (ctxt->instate == XML_PARSER_EOF) @@ -4590,7 +4750,7 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { "htmlParseStartTag: invalid element name\n", NULL, NULL); /* Dump the bogus tag like browsers do */ - while ((IS_CHAR_CH(CUR)) && (CUR != '>')) + while ((CUR == 0) && (CUR != '>')) NEXT; htmlParserFinishElementParsing(ctxt); @@ -4694,15 +4854,6 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) { else { htmlParseCharData(ctxt); } - - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - break; - } } GROW; } @@ -4966,7 +5117,6 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->record_info = 0; ctxt->validate = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->catalogs = NULL; xmlInitNodeInfoSeq(&ctxt->node_seq); @@ -5126,7 +5276,7 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) { * @first: the first char to lookup * @next: the next char to lookup or zero * @third: the next char to lookup or zero - * @comment: flag to force checking inside comments + * @ignoreattrval: skip over attribute values * * Try to find if a sequence (first, next, third) or just (first next) or * (first) is available in the input stream. @@ -5140,13 +5290,11 @@ htmlCreateDocParserCtxt(const xmlChar *cur, const char *encoding) { */ static int htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, - xmlChar next, xmlChar third, int iscomment, - int ignoreattrval) + xmlChar next, xmlChar third, int ignoreattrval) { int base, len; htmlParserInputPtr in; const xmlChar *buf; - int incomment = 0; int invalue = 0; char valdellim = 0x0; @@ -5158,8 +5306,11 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, if (base < 0) return (-1); - if (ctxt->checkIndex > base) + if (ctxt->checkIndex > base) { base = ctxt->checkIndex; + /* Abuse hasPErefs member to restore current state. */ + invalue = ctxt->hasPErefs & 1 ? 1 : 0; + } if (in->buf == NULL) { buf = in->base; @@ -5175,14 +5326,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, else if (next) len--; for (; base < len; base++) { - if ((!incomment) && (base + 4 < len) && (!iscomment)) { - if ((buf[base] == '<') && (buf[base + 1] == '!') && - (buf[base + 2] == '-') && (buf[base + 3] == '-')) { - incomment = 1; - /* do not increment past */ - base += 2; - } - } if (ignoreattrval) { if (buf[base] == '"' || buf[base] == '\'') { if (invalue) { @@ -5199,16 +5342,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, continue; } } - if (incomment) { - if (base + 3 > len) - return (-1); - if ((buf[base] == '-') && (buf[base + 1] == '-') && - (buf[base + 2] == '>')) { - incomment = 0; - base += 2; - } - continue; - } if (buf[base] == first) { if (third != 0) { if ((buf[base + 1] != next) || (buf[base + 2] != third)) @@ -5235,8 +5368,12 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, return (base - (in->cur - in->base)); } } - if ((!incomment) && (!invalue)) - ctxt->checkIndex = base; + ctxt->checkIndex = base; + /* Abuse hasPErefs member to track current state. */ + if (invalue) + ctxt->hasPErefs |= 1; + else + ctxt->hasPErefs &= ~1; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, @@ -5253,79 +5390,38 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, } /** - * htmlParseLookupChars: + * htmlParseLookupCommentEnd: * @ctxt: an HTML parser context - * @stop: Array of chars, which stop the lookup. - * @stopLen: Length of stop-Array * - * Try to find if any char of the stop-Array is available in the input - * stream. + * Try to find a comment end tag in the input stream + * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags. + * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment) * This function has a side effect of (possibly) incrementing ctxt->checkIndex * to avoid rescanning sequences of bytes, it DOES change the state of the * parser, do not use liberally. + * This wraps to htmlParseLookupSequence() * - * Returns the index to the current parsing point if a stopChar - * is available, -1 otherwise. + * Returns the index to the current parsing point if the full sequence is available, -1 otherwise. */ static int -htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop, - int stopLen) +htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt) { - int base, len; - htmlParserInputPtr in; - const xmlChar *buf; - int incomment = 0; - int i; - - in = ctxt->input; - if (in == NULL) - return (-1); - - base = in->cur - in->base; - if (base < 0) - return (-1); - - if (ctxt->checkIndex > base) - base = ctxt->checkIndex; - - if (in->buf == NULL) { - buf = in->base; - len = in->length; - } else { - buf = xmlBufContent(in->buf->buffer); - len = xmlBufUse(in->buf->buffer); - } - - for (; base < len; base++) { - if (!incomment && (base + 4 < len)) { - if ((buf[base] == '<') && (buf[base + 1] == '!') && - (buf[base + 2] == '-') && (buf[base + 3] == '-')) { - incomment = 1; - /* do not increment past */ - base += 2; - } - } - if (incomment) { - if (base + 3 > len) - return (-1); - if ((buf[base] == '-') && (buf[base + 1] == '-') && - (buf[base + 2] == '>')) { - incomment = 0; - base += 2; - } - continue; - } - for (i = 0; i < stopLen; ++i) { - if (buf[base] == stop[i]) { - ctxt->checkIndex = 0; - return (base - (in->cur - in->base)); - } - } + int mark = 0; + int cur = CUR_PTR - BASE_PTR; + + while (mark >= 0) { + mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0); + if ((mark < 0) || + (NXT(mark+2) == '>') || + ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) { + return mark; + } + ctxt->checkIndex = cur + mark + 1; } - ctxt->checkIndex = base; - return (-1); + return mark; } + /** * htmlParseTryOrFinish: * @ctxt: an HTML parser context @@ -5339,7 +5435,7 @@ static int htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { int ret = 0; htmlParserInputPtr in; - int avail = 0; + ptrdiff_t avail = 0; xmlChar cur, next; htmlParserNodeInfo node_info; @@ -5404,7 +5500,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - + (in->cur - in->base); if ((avail == 0) && (terminate)) { htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { @@ -5418,6 +5515,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } if (avail < 1) goto done; + /* + * This is done to make progress and avoid an infinite loop + * if a parsing attempt was aborted by hitting a NUL byte. After + * changing htmlCurrentChar, this probably isn't necessary anymore. + * We should consider removing this check. + */ cur = in->cur[0]; if (cur == 0) { SKIP(1); @@ -5440,7 +5543,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - + (in->cur - in->base); } if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) ctxt->sax->setDocumentLocator(ctxt->userData, @@ -5457,7 +5561,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5482,7 +5586,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - + (in->cur - in->base); /* * no chars in buffer */ @@ -5502,8 +5607,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { cur = in->cur[0]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { - if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5513,7 +5617,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5527,7 +5631,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5543,7 +5647,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (avail < 9)) { goto done; } else { - ctxt->instate = XML_PARSER_START_TAG; + ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); @@ -5555,15 +5659,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - + (in->cur - in->base); if (avail < 2) goto done; cur = in->cur[0]; next = in->cur[1]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { - if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5573,7 +5677,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5585,7 +5689,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (avail < 4)) { goto done; } else { - ctxt->instate = XML_PARSER_START_TAG; + ctxt->instate = XML_PARSER_CONTENT; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: entering START_TAG\n"); @@ -5596,7 +5700,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (in->buf == NULL) avail = in->length - (in->cur - in->base); else - avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); + avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - + (in->cur - in->base); if (avail < 1) goto done; cur = in->cur[0]; @@ -5609,8 +5714,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { next = in->cur[1]; if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { - if ((!terminate) && - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0)) + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5620,7 +5724,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5684,7 +5788,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { break; } if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; /* Capture start position */ @@ -5776,7 +5880,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } case XML_PARSER_CONTENT: { xmlChar chr[2] = { 0, 0 }; - long cons; /* * Handle preparsed entities and charRef @@ -5821,7 +5924,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { goto done; cur = in->cur[0]; next = in->cur[1]; - cons = ctxt->nbChars; if ((xmlStrEqual(ctxt->name, BAD_CAST"script")) || (xmlStrEqual(ctxt->name, BAD_CAST"style"))) { /* @@ -5831,7 +5933,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { int idx; xmlChar val; - idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0, 0); + idx = htmlParseLookupSequence(ctxt, '<', '/', 0, 0); if (idx < 0) goto done; val = in->cur[idx + 2]; @@ -5858,7 +5960,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0)) goto done; htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "Misplaced DOCTYPE declaration\n", @@ -5866,9 +5968,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { htmlParseDocTypeDecl(ctxt); } else if ((cur == '<') && (next == '!') && (in->cur[2] == '-') && (in->cur[3] == '-')) { - if ((!terminate) && - (htmlParseLookupSequence( - ctxt, '-', '-', '>', 1, 1) < 0)) + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5878,7 +5978,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_CONTENT; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, @@ -5897,24 +5997,35 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif break; } else if (cur == '<') { - ctxt->instate = XML_PARSER_START_TAG; - ctxt->checkIndex = 0; + if ((!terminate) && (next == 0)) + goto done; + /* + * Only switch to START_TAG if the next character + * starts a valid name. Otherwise, htmlParseStartTag + * might return without consuming all characters + * up to the final '>'. + */ + if ((IS_ASCII_LETTER(next)) || + (next == '_') || (next == ':') || (next == '.')) { + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; #ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "HPP: entering START_TAG\n"); + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); #endif + } else { + htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, + "htmlParseTryOrFinish: " + "invalid element name\n", + NULL, NULL); + htmlCheckParagraph(ctxt); + if ((ctxt->sax != NULL) && + (ctxt->sax->characters != NULL)) + ctxt->sax->characters(ctxt->userData, + in->cur, 1); + NEXT; + } break; - } else if (cur == '&') { - if ((!terminate) && - (htmlParseLookupChars(ctxt, - BAD_CAST "; >/", 4) < 0)) - goto done; -#ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "HPP: Parsing Reference\n"); -#endif - /* TODO: check generation of subtrees if noent !!! */ - htmlParseReference(ctxt); } else { /* * check that the text sequence is complete @@ -5923,25 +6034,24 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { * data detection. */ if ((!terminate) && - (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0)) + (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0)) goto done; ctxt->checkIndex = 0; #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: Parsing char data\n"); #endif - htmlParseCharData(ctxt); + while ((ctxt->instate != XML_PARSER_EOF) && + (cur != '<') && (in->cur < in->end)) { + if (cur == '&') { + htmlParseReference(ctxt); + } else { + htmlParseCharData(ctxt); + } + cur = in->cur[0]; + } } } - if (cons == ctxt->nbChars) { - if (ctxt->node != NULL) { - htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "detected an error in element content\n", - NULL, NULL); - } - NEXT; - break; - } break; } @@ -5949,7 +6059,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if (avail < 2) goto done; if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0, 0, 1) < 0)) + (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; htmlParseEndTag(ctxt); if (ctxt->nameNr == 0) { @@ -6131,12 +6241,12 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, int res; res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; ctxt->disableSAX = 1; return (XML_PARSER_EOF); } - xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size); #endif @@ -6155,12 +6265,12 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, size_t current = ctxt->input->cur - ctxt->input->base; nbchars = xmlCharEncInput(in, terminate); + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); if (nbchars < 0) { htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, "encoder error\n", NULL, NULL); return(XML_ERR_INVALID_ENCODING); } - xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); } } } @@ -6692,7 +6802,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) ctxt->vctxt.error = xmlParserValidityError; ctxt->vctxt.warning = xmlParserValidityWarning; ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; diff --git a/third_party/libxml/src/HTMLtree.c b/third_party/libxml/src/HTMLtree.c index fe5d086f5a6a..7a2b85583892 100644 --- a/third_party/libxml/src/HTMLtree.c +++ b/third_party/libxml/src/HTMLtree.c @@ -518,7 +518,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, buf = xmlOutputBufferCreateFile(out, handler); if (buf == NULL) return(0); - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format); ret = xmlOutputBufferClose(buf); return(ret); @@ -670,13 +670,11 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, * @buf: the HTML buffer output * @doc: the document * @cur: the attribute pointer - * @encoding: the encoding string * * Dump an HTML attribute */ static void -htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, - const char *encoding ATTRIBUTE_UNUSED) { +htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { xmlChar *value; /* @@ -706,49 +704,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) { + xmlChar *escaped; xmlChar *tmp = value; - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */ - xmlBufCCat(buf->buffer, "\""); while (IS_BLANK_CH(*tmp)) tmp++; - /* URI Escape everything, except server side includes. */ - for ( ; ; ) { - xmlChar *escaped; - xmlChar endChar; - xmlChar *end = NULL; - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST ""); - if (end != NULL) { - *start = '\0'; - } - } - - /* Escape the whole string, or until start (set to '\0'). */ - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+"); - if (escaped != NULL) { - xmlBufCat(buf->buffer, escaped); - xmlFree(escaped); - } else { - xmlBufCat(buf->buffer, tmp); - } - - if (end == NULL) { /* Everything has been written. */ - break; - } - - /* Do not escape anything within server side includes. */ - *start = '<'; /* Restore the first character of "") */ - endChar = *end; - *end = '\0'; - xmlBufCat(buf->buffer, start); - *end = endChar; - tmp = end; + /* + * the < and > have already been escaped at the entity level + * And doing so here breaks server side includes + */ + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>"); + if (escaped != NULL) { + xmlBufWriteQuotedString(buf->buffer, escaped); + xmlFree(escaped); + } else { + xmlBufWriteQuotedString(buf->buffer, value); } - - xmlBufCCat(buf->buffer, "\""); } else { xmlBufWriteQuotedString(buf->buffer, value); } @@ -759,63 +730,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, } } -/** - * htmlAttrListDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first attribute pointer - * @encoding: the encoding string - * - * Dump a list of HTML attributes - */ -static void -htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { - if (cur == NULL) { - return; - } - while (cur != NULL) { - htmlAttrDumpOutput(buf, doc, cur, encoding); - cur = cur->next; - } -} - - - -/** - * htmlNodeListDumpOutput: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first node - * @encoding: the encoding string - * @format: should formatting spaces been added - * - * Dump an HTML node list, recursive behaviour,children are printed too. - */ -static void -htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding, int format) { - if (cur == NULL) { - return; - } - while (cur != NULL) { - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); - cur = cur->next; - } -} - /** * htmlNodeDumpFormatOutput: * @buf: the HTML buffer output * @doc: the document * @cur: the current node - * @encoding: the encoding string + * @encoding: the encoding string (unused) * @format: should formatting spaces been added * * Dump an HTML node, recursive behaviour,children are printed too. */ void htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding, int format) { + xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED, + int format) { + xmlNodePtr root, parent; + xmlAttrPtr attr; const htmlElemDesc * info; xmlInitParser(); @@ -823,172 +753,213 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, if ((cur == NULL) || (buf == NULL)) { return; } - /* - * Special cases. - */ - if (cur->type == XML_DTD_NODE) - return; - if ((cur->type == XML_HTML_DOCUMENT_NODE) || - (cur->type == XML_DOCUMENT_NODE)){ - htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); - return; - } - if (cur->type == XML_ATTRIBUTE_NODE) { - htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); - return; - } - if (cur->type == HTML_TEXT_NODE) { - if (cur->content != NULL) { - if (((cur->name == (const xmlChar *)xmlStringText) || - (cur->name != (const xmlChar *)xmlStringTextNoenc)) && - ((cur->parent == NULL) || - ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && - (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { - xmlChar *buffer; - - buffer = xmlEncodeEntitiesReentrant(doc, cur->content); - if (buffer != NULL) { - xmlOutputBufferWriteString(buf, (const char *)buffer); - xmlFree(buffer); - } - } else { - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - } - return; - } - if (cur->type == HTML_COMMENT_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, ""); - } - return; - } - if (cur->type == HTML_PI_NODE) { - if (cur->name == NULL) - return; - xmlOutputBufferWriteString(buf, "name); - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, " "); - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - xmlOutputBufferWriteString(buf, ">"); - return; - } - if (cur->type == HTML_ENTITY_REF_NODE) { - xmlOutputBufferWriteString(buf, "&"); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ";"); - return; - } - if (cur->type == HTML_PRESERVE_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - return; - } - /* - * Get specific HTML info for that node. - */ - if (cur->ns == NULL) - info = htmlTagLookup(cur->name); - else - info = NULL; + root = cur; + parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_HTML_DOCUMENT_NODE: + case XML_DOCUMENT_NODE: + if (((xmlDocPtr) cur)->intSubset != NULL) { + htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); + } + if (cur->children != NULL) { + /* Always validate cur->parent when descending. */ + if (cur->parent == parent) { + parent = cur; + cur = cur->children; + continue; + } + } else { + xmlOutputBufferWriteString(buf, "\n"); + } + break; - xmlOutputBufferWriteString(buf, "<"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->nsDef) - xmlNsListDumpOutput(buf, cur->nsDef); - if (cur->properties != NULL) - htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); - - if ((info != NULL) && (info->empty)) { - xmlOutputBufferWriteString(buf, ">"); - if ((format) && (!info->isinline) && (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - return; - } - if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && - (cur->children == NULL)) { - if ((info != NULL) && (info->saveEndTag != 0) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { - xmlOutputBufferWriteString(buf, ">"); - } else { - xmlOutputBufferWriteString(buf, ">parent != parent) && (cur->children != NULL)) { + htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); + break; + } + + /* + * Get specific HTML info for that node. + */ + if (cur->ns == NULL) + info = htmlTagLookup(cur->name); + else + info = NULL; + + xmlOutputBufferWriteString(buf, "<"); if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); xmlOutputBufferWriteString(buf, ":"); } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ">"); - } - if ((format) && (cur->next != NULL) && - (info != NULL) && (!info->isinline)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - return; - } - xmlOutputBufferWriteString(buf, ">"); - if ((cur->type != XML_ELEMENT_NODE) && - (cur->content != NULL)) { - /* - * Uses the OutputBuffer property to automatically convert - * invalids to charrefs - */ - - xmlOutputBufferWriteString(buf, (const char *) cur->content); - } - if (cur->children != NULL) { - if ((format) && (info != NULL) && (!info->isinline) && - (cur->children->type != HTML_TEXT_NODE) && - (cur->children->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last) && - (cur->name != NULL) && - (cur->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->last->type != HTML_TEXT_NODE) && - (cur->last->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last) && - (cur->name != NULL) && - (cur->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); - } - xmlOutputBufferWriteString(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWriteString(buf, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWriteString(buf, ">"); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE) && - (cur->parent != NULL) && - (cur->parent->name != NULL) && - (cur->parent->name[0] != 'p')) /* p, pre, param */ - xmlOutputBufferWriteString(buf, "\n"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutput(buf, cur->nsDef); + attr = cur->properties; + while (attr != NULL) { + htmlAttrDumpOutput(buf, doc, attr); + attr = attr->next; + } + + if ((info != NULL) && (info->empty)) { + xmlOutputBufferWriteString(buf, ">"); + } else if (cur->children == NULL) { + if ((info != NULL) && (info->saveEndTag != 0) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && + (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { + xmlOutputBufferWriteString(buf, ">"); + } else { + xmlOutputBufferWriteString(buf, ">ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, + (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + } + } else { + xmlOutputBufferWriteString(buf, ">"); + if ((format) && (info != NULL) && (!info->isinline) && + (cur->children->type != HTML_TEXT_NODE) && + (cur->children->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + parent = cur; + cur = cur->children; + continue; + } + + if ((format) && (cur->next != NULL) && + (info != NULL) && (!info->isinline)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (parent != NULL) && + (parent->name != NULL) && + (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + + break; + + case XML_ATTRIBUTE_NODE: + htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur); + break; + + case HTML_TEXT_NODE: + if (cur->content == NULL) + break; + if (((cur->name == (const xmlChar *)xmlStringText) || + (cur->name != (const xmlChar *)xmlStringTextNoenc)) && + ((parent == NULL) || + ((xmlStrcasecmp(parent->name, BAD_CAST "script")) && + (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); + if (buffer != NULL) { + xmlOutputBufferWriteString(buf, (const char *)buffer); + xmlFree(buffer); + } + } else { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + break; + + case HTML_COMMENT_NODE: + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, ""); + } + break; + + case HTML_PI_NODE: + if (cur->name != NULL) { + xmlOutputBufferWriteString(buf, "name); + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, " "); + xmlOutputBufferWriteString(buf, + (const char *)cur->content); + } + xmlOutputBufferWriteString(buf, ">"); + } + break; + + case HTML_ENTITY_REF_NODE: + xmlOutputBufferWriteString(buf, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ";"); + break; + + case HTML_PRESERVE_NODE: + if (cur->content != NULL) { + xmlOutputBufferWriteString(buf, (const char *)cur->content); + } + break; + + default: + break; + } + + while (1) { + if (cur == root) + return; + if (cur->next != NULL) { + cur = cur->next; + break; + } + + cur = parent; + /* cur->parent was validated when descending. */ + parent = cur->parent; + + if ((cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE)) { + xmlOutputBufferWriteString(buf, "\n"); + } else { + if ((format) && (cur->ns == NULL)) + info = htmlTagLookup(cur->name); + else + info = NULL; + + if ((format) && (info != NULL) && (!info->isinline) && + (cur->last->type != HTML_TEXT_NODE) && + (cur->last->type != HTML_ENTITY_REF_NODE) && + (cur->children != cur->last) && + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + + xmlOutputBufferWriteString(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWriteString(buf, ">"); + + if ((format) && (info != NULL) && (!info->isinline) && + (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && + (parent != NULL) && + (parent->name != NULL) && + (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + } + } } } @@ -997,63 +968,45 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, * @buf: the HTML buffer output * @doc: the document * @cur: the current node - * @encoding: the encoding string + * @encoding: the encoding string (unused) * * Dump an HTML node, recursive behaviour,children are printed too, * and formatting returns/spaces are added. */ void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, - xmlNodePtr cur, const char *encoding) { - htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); + xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) { + htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1); } /** * htmlDocContentDumpFormatOutput: * @buf: the HTML buffer output * @cur: the document - * @encoding: the encoding string + * @encoding: the encoding string (unused) * @format: should formatting spaces been added * * Dump an HTML document. */ void htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, - const char *encoding, int format) { - int type; - - xmlInitParser(); - - if ((buf == NULL) || (cur == NULL)) - return; - - /* - * force to output the stuff as HTML, especially for entities - */ - type = cur->type; - cur->type = XML_HTML_DOCUMENT_NODE; - if (cur->intSubset != NULL) { - htmlDtdDumpOutput(buf, cur, NULL); - } - if (cur->children != NULL) { - htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); - } - xmlOutputBufferWriteString(buf, "\n"); - cur->type = (xmlElementType) type; + const char *encoding ATTRIBUTE_UNUSED, + int format) { + htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format); } /** * htmlDocContentDumpOutput: * @buf: the HTML buffer output * @cur: the document - * @encoding: the encoding string + * @encoding: the encoding string (unused) * * Dump an HTML document. Formatting return/spaces are added. */ void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, - const char *encoding) { - htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); + const char *encoding ATTRIBUTE_UNUSED) { + htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1); } /************************************************************************ diff --git a/third_party/libxml/src/Makefile.am b/third_party/libxml/src/Makefile.am index be1a883db970..a9284b95b6f7 100644 --- a/third_party/libxml/src/Makefile.am +++ b/third_party/libxml/src/Makefile.am @@ -2,9 +2,9 @@ ACLOCAL_AMFLAGS = -I m4 -SUBDIRS = include . doc example xstc $(PYTHON_SUBDIR) +SUBDIRS = include . doc example fuzz xstc $(PYTHON_SUBDIR) -DIST_SUBDIRS = include . doc example python xstc +DIST_SUBDIRS = include . doc example fuzz python xstc AM_CPPFLAGS = -I$(top_builddir)/include -I$(srcdir)/include @@ -210,6 +210,7 @@ runtests: runtest$(EXEEXT) testrecurse$(EXEEXT) testapi$(EXEEXT) \ $(CHECKER) ./runxmlconf$(EXEEXT) @(if [ "$(PYTHON_SUBDIR)" != "" ] ; then cd python ; \ $(MAKE) tests ; fi) + @cd fuzz; $(MAKE) tests check: all runtests @@ -906,14 +907,16 @@ Regexptests: testRegexp$(EXEEXT) if [ ! -d $$i ] ; then \ if [ ! -f $(srcdir)/result/regexp/$$name ] ; then \ echo New test file $$name ; \ - $(CHECKER) $(top_builddir)/testRegexp -i $$i > $(srcdir)/result/regexp/$$name; \ + $(CHECKER) $(top_builddir)/testRegexp -i $$i > $(srcdir)/result/regexp/$$name 2> $(srcdir)/result/regexp/$$name.err ; \ + if [ ! -s "$(srcdir)/result/regexp/$$name.err" ] ; then rm $(srcdir)/result/regexp/$$name.err; fi ; \ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ else \ - log=`$(CHECKER) $(top_builddir)/testRegexp -i $$i 2>&1 > result.$$name ; \ + log=`$(CHECKER) $(top_builddir)/testRegexp -i $$i > result.$$name 2> error.$$name ; \ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ - diff $(srcdir)/result/regexp/$$name result.$$name` ; \ - if [ -n "$$log" ] ; then echo $$name result ; echo "$$log" ; fi ; \ - rm result.$$name ; \ + diff $(srcdir)/result/regexp/$$name result.$$name ; \ + if [ -s "$(srcdir)/result/regexp/$$name.err" -o -s "error.$$name" ] ; then diff $(srcdir)/result/regexp/$$name.err error.$$name ; fi` ; \ + if [ -n "$$log" ] ; then echo $$name result ; echo $$log ; fi ; \ + rm result.$$name error.$$name ; \ fi ; fi ; done) # Disabled for now @@ -1257,6 +1260,7 @@ EXTRA_DIST = xml2-config.in xml2Conf.sh.in libxml.spec.in libxml2.spec \ dbgen.pl dbgenattr.pl regressions.py regressions.xml \ README.tests Makefile.tests libxml2.syms timsort.h \ README.zOS \ + CMakeLists.txt config.h.cmake.in libxml2-config.cmake.cmake.in \ $(CVS_EXTRA_DIST) diff --git a/third_party/libxml/src/SAX2.c b/third_party/libxml/src/SAX2.c index 9df01840418e..99019a984cd6 100644 --- a/third_party/libxml/src/SAX2.c +++ b/third_party/libxml/src/SAX2.c @@ -1663,23 +1663,23 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) } } - /* - * Insert all the defaulted attributes from the DTD especially namespaces - */ - if ((!ctxt->html) && - ((ctxt->myDoc->intSubset != NULL) || - (ctxt->myDoc->extSubset != NULL))) { - xmlCheckDefaultedAttributes(ctxt, name, prefix, atts); - } + if (!ctxt->html) { + /* + * Insert all the defaulted attributes from the DTD especially + * namespaces + */ + if ((ctxt->myDoc->intSubset != NULL) || + (ctxt->myDoc->extSubset != NULL)) { + xmlCheckDefaultedAttributes(ctxt, name, prefix, atts); + } - /* - * process all the attributes whose name start with "xmlns" - */ - if (atts != NULL) { - i = 0; - att = atts[i++]; - value = atts[i++]; - if (!ctxt->html) { + /* + * process all the attributes whose name start with "xmlns" + */ + if (atts != NULL) { + i = 0; + att = atts[i++]; + value = atts[i++]; while ((att != NULL) && (value != NULL)) { if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l') && (att[3] == 'n') && (att[4] == 's')) @@ -1688,30 +1688,30 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) att = atts[i++]; value = atts[i++]; } - } - } + } - /* - * Search the namespace, note that since the attributes have been - * processed, the local namespaces are available. - */ - ns = xmlSearchNs(ctxt->myDoc, ret, prefix); - if ((ns == NULL) && (parent != NULL)) - ns = xmlSearchNs(ctxt->myDoc, parent, prefix); - if ((prefix != NULL) && (ns == NULL)) { - ns = xmlNewNs(ret, NULL, prefix); - xmlNsWarnMsg(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, - "Namespace prefix %s is not defined\n", - prefix, NULL); - } + /* + * Search the namespace, note that since the attributes have been + * processed, the local namespaces are available. + */ + ns = xmlSearchNs(ctxt->myDoc, ret, prefix); + if ((ns == NULL) && (parent != NULL)) + ns = xmlSearchNs(ctxt->myDoc, parent, prefix); + if ((prefix != NULL) && (ns == NULL)) { + ns = xmlNewNs(ret, NULL, prefix); + xmlNsWarnMsg(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE, + "Namespace prefix %s is not defined\n", + prefix, NULL); + } - /* - * set the namespace node, making sure that if the default namespace - * is unbound on a parent we simply keep it NULL - */ - if ((ns != NULL) && (ns->href != NULL) && - ((ns->href[0] != 0) || (ns->prefix != NULL))) - xmlSetNs(ret, ns); + /* + * set the namespace node, making sure that if the default namespace + * is unbound on a parent we simply keep it NULL + */ + if ((ns != NULL) && (ns->href != NULL) && + ((ns->href[0] != 0) || (ns->prefix != NULL))) + xmlSetNs(ret, ns); + } /* * process all the other attributes @@ -2493,20 +2493,21 @@ xmlSAX2Reference(void *ctx, const xmlChar *name) } /** - * xmlSAX2Characters: + * xmlSAX2Text: * @ctx: the user data (XML parser context) * @ch: a xmlChar string * @len: the number of xmlChar + * @type: text or cdata * - * receiving some chars from the parser. + * Append characters. */ -void -xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) +static void +xmlSAX2Text(xmlParserCtxtPtr ctxt, const xmlChar *ch, int len, + xmlElementType type) { - xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; xmlNodePtr lastChild; - if (ctx == NULL) return; + if (ctxt == NULL) return; #ifdef DEBUG_SAX xmlGenericError(xmlGenericErrorContext, "SAX.xmlSAX2Characters(%.30s, %d)\n", ch, len); @@ -2535,7 +2536,10 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) * elements. Use an attribute in the structure !!! */ if (lastChild == NULL) { - lastChild = xmlSAX2TextNode(ctxt, ch, len); + if (type == XML_TEXT_NODE) + lastChild = xmlSAX2TextNode(ctxt, ch, len); + else + lastChild = xmlNewCDataBlock(ctxt->myDoc, ch, len); if (lastChild != NULL) { ctxt->node->children = lastChild; ctxt->node->last = lastChild; @@ -2549,8 +2553,9 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) } } else { int coalesceText = (lastChild != NULL) && - (lastChild->type == XML_TEXT_NODE) && - (lastChild->name == xmlStringText); + (lastChild->type == type) && + ((type != XML_TEXT_NODE) || + (lastChild->name == xmlStringText)); if ((coalesceText) && (ctxt->nodemem != 0)) { /* * The whole point of maintaining nodelen and nodemem, @@ -2607,7 +2612,10 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) } } else { /* Mixed content, first time */ - lastChild = xmlSAX2TextNode(ctxt, ch, len); + if (type == XML_TEXT_NODE) + lastChild = xmlSAX2TextNode(ctxt, ch, len); + else + lastChild = xmlNewCDataBlock(ctxt->myDoc, ch, len); if (lastChild != NULL) { xmlAddChild(ctxt->node, lastChild); if (ctxt->node->children != NULL) { @@ -2619,6 +2627,20 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) } } +/** + * xmlSAX2Characters: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + */ +void +xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) +{ + xmlSAX2Text((xmlParserCtxtPtr) ctx, ch, len, XML_TEXT_NODE); +} + /** * xmlSAX2IgnorableWhitespace: * @ctx: the user data (XML parser context) @@ -2775,27 +2797,7 @@ xmlSAX2Comment(void *ctx, const xmlChar *value) void xmlSAX2CDataBlock(void *ctx, const xmlChar *value, int len) { - xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; - xmlNodePtr ret, lastChild; - - if (ctx == NULL) return; -#ifdef DEBUG_SAX - xmlGenericError(xmlGenericErrorContext, - "SAX.pcdata(%.10s, %d)\n", value, len); -#endif - lastChild = xmlGetLastChild(ctxt->node); -#ifdef DEBUG_SAX_TREE - xmlGenericError(xmlGenericErrorContext, - "add chars to %s \n", ctxt->node->name); -#endif - if ((lastChild != NULL) && - (lastChild->type == XML_CDATA_SECTION_NODE)) { - xmlTextConcat(lastChild, value, len); - } else { - ret = xmlNewCDataBlock(ctxt->myDoc, value, len); - if (xmlAddChild(ctxt->node, ret) == NULL) - xmlFreeNode(ret); - } + xmlSAX2Text((xmlParserCtxtPtr) ctx, value, len, XML_CDATA_SECTION_NODE); } static int xmlSAX2DefaultVersionValue = 2; diff --git a/third_party/libxml/src/aclocal.m4 b/third_party/libxml/src/aclocal.m4 index 1fc80003ea3d..a671949bfa6b 100644 --- a/third_party/libxml/src/aclocal.m4 +++ b/third_party/libxml/src/aclocal.m4 @@ -1,4 +1,4 @@ -# generated automatically by aclocal 1.16.2 -*- Autoconf -*- +# generated automatically by aclocal 1.16.3 -*- Autoconf -*- # Copyright (C) 1996-2020 Free Software Foundation, Inc. @@ -14,8 +14,8 @@ m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])]) m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl -m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],, -[m4_warning([this file was generated for autoconf 2.69. +m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],, +[m4_warning([this file was generated for autoconf 2.71. You have another version of autoconf. It may work, but is not guaranteed to. If you have problems, you may need to regenerate the build system entirely. To do so, use the procedure documented by the package, typically 'autoreconf'.])]) @@ -311,7 +311,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.16' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. -m4_if([$1], [1.16.2], [], +m4_if([$1], [1.16.3], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) @@ -327,7 +327,7 @@ m4_define([_AM_AUTOCONF_VERSION], []) # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], -[AM_AUTOMAKE_VERSION([1.16.2])dnl +[AM_AUTOMAKE_VERSION([1.16.3])dnl m4_ifndef([AC_AUTOCONF_VERSION], [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) @@ -1015,12 +1015,7 @@ AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl if test x"${MISSING+set}" != xset; then - case $am_aux_dir in - *\ * | *\ *) - MISSING="\${SHELL} \"$am_aux_dir/missing\"" ;; - *) - MISSING="\${SHELL} $am_aux_dir/missing" ;; - esac + MISSING="\${SHELL} '$am_aux_dir/missing'" fi # Use eval to expand $SHELL if eval "$MISSING --is-lightweight"; then diff --git a/third_party/libxml/src/buf.c b/third_party/libxml/src/buf.c index 8ad18a1ef98b..24368d379f82 100644 --- a/third_party/libxml/src/buf.c +++ b/third_party/libxml/src/buf.c @@ -1334,8 +1334,12 @@ xmlBufGetInputBase(xmlBufPtr buf, xmlParserInputPtr input) { int xmlBufSetInputBaseCur(xmlBufPtr buf, xmlParserInputPtr input, size_t base, size_t cur) { - if ((input == NULL) || (buf == NULL) || (buf->error)) + if (input == NULL) + return(-1); + if ((buf == NULL) || (buf->error)) { + input->base = input->cur = input->end = BAD_CAST ""; return(-1); + } CHECK_COMPAT(buf) input->base = &buf->content[base]; input->cur = input->base + cur; diff --git a/third_party/libxml/src/config.h.cmake.in b/third_party/libxml/src/config.h.cmake.in new file mode 100644 index 000000000000..22b3c9207a2d --- /dev/null +++ b/third_party/libxml/src/config.h.cmake.in @@ -0,0 +1,288 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define if __attribute__((destructor)) is accepted */ +#cmakedefine ATTRIBUTE_DESTRUCTOR 1 + +/* Type cast for the gethostbyname() argument */ +#cmakedefine GETHOSTBYNAME_ARG_CAST @GETHOSTBYNAME_ARG_CAST@ + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ARPA_NAMESER_H 1 + +/* Whether struct sockaddr::__ss_family exists */ +#cmakedefine HAVE_BROKEN_SS_FAMILY 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_CTYPE_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_DLFCN_H 1 + +/* Have dlopen based dso */ +#cmakedefine HAVE_DLOPEN 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_DL_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_FCNTL_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_FLOAT_H 1 + +/* Define to 1 if you have the `fprintf' function. */ +#cmakedefine HAVE_FPRINTF 1 + +/* Define to 1 if you have the `ftime' function. */ +#cmakedefine HAVE_FTIME 1 + +/* Define if getaddrinfo is there */ +#cmakedefine HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#cmakedefine HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `isascii' function. */ +#cmakedefine HAVE_ISASCII 1 + +/* Define if isinf is there */ +#cmakedefine HAVE_ISINF 1 + +/* Define if isnan is there */ +#cmakedefine HAVE_ISNAN 1 + +/* Define if history library is there (-lhistory) */ +#cmakedefine HAVE_LIBHISTORY 1 + +/* Define if pthread library is there (-lpthread) */ +#cmakedefine HAVE_LIBPTHREAD 1 + +/* Define if readline library is there (-lreadline) */ +#cmakedefine HAVE_LIBREADLINE 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `localtime' function. */ +#cmakedefine HAVE_LOCALTIME 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_LZMA_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_MALLOC_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_MATH_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mmap' function. */ +#cmakedefine HAVE_MMAP 1 + +/* Define to 1 if you have the `munmap' function. */ +#cmakedefine HAVE_MUNMAP 1 + +/* mmap() is no good without munmap() */ +#if defined(HAVE_MMAP) && !defined(HAVE_MUNMAP) +# undef /**/ HAVE_MMAP +#endif + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +#cmakedefine HAVE_NDIR_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_NETDB_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_POLL_H 1 + +/* Define to 1 if you have the `printf' function. */ +#cmakedefine HAVE_PRINTF 1 + +/* Define if is there */ +#cmakedefine HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the `putenv' function. */ +#cmakedefine HAVE_PUTENV 1 + +/* Define to 1 if you have the `rand' function. */ +#cmakedefine HAVE_RAND 1 + +/* Define to 1 if you have the `rand_r' function. */ +#cmakedefine HAVE_RAND_R 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_RESOLV_H 1 + +/* Have shl_load based dso */ +#cmakedefine HAVE_SHLLOAD 1 + +/* Define to 1 if you have the `signal' function. */ +#cmakedefine HAVE_SIGNAL 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#cmakedefine HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `sprintf' function. */ +#cmakedefine HAVE_SPRINTF 1 + +/* Define to 1 if you have the `srand' function. */ +#cmakedefine HAVE_SRAND 1 + +/* Define to 1 if you have the `sscanf' function. */ +#cmakedefine HAVE_SSCANF 1 + +/* Define to 1 if you have the `stat' function. */ +#cmakedefine HAVE_STAT 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strftime' function. */ +#cmakedefine HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_STRING_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#cmakedefine HAVE_SYS_DIR_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_MMAN_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#cmakedefine HAVE_SYS_NDIR_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_TIMEB_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the `time' function. */ +#cmakedefine HAVE_TIME 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_UNISTD_H 1 + +/* Whether va_copy() is available */ +#cmakedefine HAVE_VA_COPY 1 + +/* Define to 1 if you have the `vfprintf' function. */ +#cmakedefine HAVE_VFPRINTF 1 + +/* Define to 1 if you have the `vsnprintf' function. */ +#cmakedefine HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `vsprintf' function. */ +#cmakedefine HAVE_VSPRINTF 1 + +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ZLIB_H 1 + +/* Whether __va_copy() is available */ +#cmakedefine HAVE___VA_COPY 1 + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST @ICONV_CONST@ + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#cmakedefine LT_OBJDIR "@LT_OBJDIR@" + +/* Name of package */ +#define PACKAGE "@PACKAGE@" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "@PACKAGE_NAME@" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "@PACKAGE_STRING@" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "@PACKAGE_TARNAME@" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "@PACKAGE_URL@" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "@PACKAGE_VERSION@" + +/* Type cast for the send() function 2nd arg */ +#cmakedefine SEND_ARG2_CAST @SEND_ARG2_CAST@ + +/* Define to 1 if you have the ANSI C header files. */ +#cmakedefine STDC_HEADERS 1 + +/* Support for IPv6 */ +#cmakedefine SUPPORT_IP6 1 + +/* Define if va_list is an array type */ +#cmakedefine VA_LIST_IS_ARRAY 1 + +/* Version number of package */ +#cmakedefine VERSION "@VERSION@" + +/* Determine what socket length (socklen_t) data type is */ +#cmakedefine XML_SOCKLEN_T @XML_SOCKLEN_T@ + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#cmakedefine _UINT32_T @_UINT32_T@ + +/* ss_family is not defined here, use __ss_family instead */ +#cmakedefine ss_family @ss_family@ + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#cmakedefine uint32_t @uint32_t@ diff --git a/third_party/libxml/src/config.h.in b/third_party/libxml/src/config.h.in index a751769cdba8..a55d4e185723 100644 --- a/third_party/libxml/src/config.h.in +++ b/third_party/libxml/src/config.h.in @@ -1,5 +1,8 @@ /* config.h.in. Generated from configure.ac by autoheader. */ +/* A form that will not confuse apibuild.py */ +#undef ATTRIBUTE_DESTRUCTOR + /* Type cast for the gethostbyname() argument */ #undef GETHOSTBYNAME_ARG_CAST @@ -9,6 +12,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_ARPA_NAMESER_H +/* Define if __attribute__((destructor)) is accepted */ +#undef HAVE_ATTRIBUTE_DESTRUCTOR + /* Whether struct sockaddr::__ss_family exists */ #undef HAVE_BROKEN_SS_FAMILY @@ -84,9 +90,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MATH_H -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - /* Define to 1 if you have the `mmap' function. */ #undef HAVE_MMAP @@ -158,6 +161,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H +/* Define to 1 if you have the header file. */ +#undef HAVE_STDIO_H + /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H @@ -256,7 +262,9 @@ /* Type cast for the send() function 2nd arg */ #undef SEND_ARG2_CAST -/* Define to 1 if you have the ANSI C header files. */ +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ #undef STDC_HEADERS /* Support for IPv6 */ diff --git a/third_party/libxml/src/configure.ac b/third_party/libxml/src/configure.ac index 09418af2398d..5b161a5450e8 100644 --- a/third_party/libxml/src/configure.ac +++ b/third_party/libxml/src/configure.ac @@ -1,15 +1,20 @@ dnl Process this file with autoconf to produce a configure script. AC_PREREQ([2.63]) -AC_INIT + +m4_define([MAJOR_VERSION], 2) +m4_define([MINOR_VERSION], 9) +m4_define([MICRO_VERSION], 12) + +AC_INIT([libxml2],[MAJOR_VERSION.MINOR_VERSION.MICRO_VERSION]) AC_CONFIG_SRCDIR([entities.c]) AC_CONFIG_HEADERS([config.h]) AM_MAINTAINER_MODE([enable]) AC_CONFIG_MACRO_DIR([m4]) AC_CANONICAL_HOST -LIBXML_MAJOR_VERSION=2 -LIBXML_MINOR_VERSION=9 -LIBXML_MICRO_VERSION=10 +LIBXML_MAJOR_VERSION=MAJOR_VERSION +LIBXML_MINOR_VERSION=MINOR_VERSION +LIBXML_MICRO_VERSION=MICRO_VERSION LIBXML_MICRO_VERSION_SUFFIX= LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION$LIBXML_MICRO_VERSION_SUFFIX LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION @@ -50,7 +55,7 @@ AC_SUBST(LIBXML_VERSION_EXTRA) VERSION=${LIBXML_VERSION} -AM_INIT_AUTOMAKE(libxml2, $VERSION) +AM_INIT_AUTOMAKE([foreign]) # Support silent build rules, requires at least automake-1.11. Disable # by either passing --disable-silent-rules to configure or passing V=1 @@ -641,6 +646,17 @@ else [Type cast for the send() function 2nd arg]) fi +dnl Checking whether __attribute__((destructor)) is accepted by the compiler +AC_MSG_CHECKING([whether __attribute__((destructor)) is accepted]) +AC_TRY_COMPILE2([ +void __attribute__((destructor)) +f(void) {}], [], [ + AC_MSG_RESULT(yes) + AC_DEFINE([HAVE_ATTRIBUTE_DESTRUCTOR], [1],[Define if __attribute__((destructor)) is accepted]) + AC_DEFINE([ATTRIBUTE_DESTRUCTOR], [__attribute__((destructor))],[A form that will not confuse apibuild.py])],[ + AC_MSG_RESULT(no)]) + + dnl ***********************Checking for availability of IPv6******************* AC_MSG_CHECKING([whether to enable IPv6]) @@ -1503,8 +1519,8 @@ else AC_CHECK_HEADER(unicode/ucnv.h, AC_MSG_CHECKING(for icu) - AC_TRY_LINK([#include ],[ - UConverter *utf = ucnv_open("UTF-8", NULL);],[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ + UConverter *utf = ucnv_open("UTF-8", NULL);]])],[ AC_MSG_RESULT(yes) have_libicu=yes],[ AC_MSG_RESULT(no) @@ -1515,8 +1531,8 @@ else LDFLAGS="${LDFLAGS} ${ICU_LIBS}" LIBS="${LIBS} -licucore" - AC_TRY_LINK([#include ],[ - UConverter *utf = ucnv_open("UTF-8", NULL);],[ + AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include ]], [[ + UConverter *utf = ucnv_open("UTF-8", NULL);]])],[ AC_MSG_RESULT(yes) have_libicu=yes ICU_LIBS="${ICU_LIBS} -licucore" @@ -1704,7 +1720,7 @@ rm -f COPYING.LIB COPYING ln -s $srcdir/Copyright COPYING # keep on one line for cygwin c.f. #130896 -AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake]) +AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile doc/examples/Makefile doc/devhelp/Makefile example/Makefile fuzz/Makefile python/Makefile python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake]) AC_CONFIG_FILES([python/setup.py], [chmod +x python/setup.py]) AC_CONFIG_FILES([xml2-config], [chmod +x xml2-config]) AC_OUTPUT diff --git a/third_party/libxml/src/encoding.c b/third_party/libxml/src/encoding.c index 83f64a4ec484..5e50c1537f00 100644 --- a/third_party/libxml/src/encoding.c +++ b/third_party/libxml/src/encoding.c @@ -170,7 +170,7 @@ closeIcuConverter(uconv_t *conv) * Returns 0 if success, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int asciiToUTF8(unsigned char* out, int *outlen, @@ -217,7 +217,7 @@ asciiToUTF8(unsigned char* out, int *outlen, * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int UTF8Toascii(unsigned char* out, int *outlen, @@ -301,7 +301,7 @@ UTF8Toascii(unsigned char* out, int *outlen, * Returns the number of bytes written if success, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ int isolat1ToUTF8(unsigned char* out, int *outlen, @@ -373,6 +373,11 @@ UTF8ToUTF8(unsigned char* out, int *outlen, if (len < 0) return(-1); + /* + * FIXME: Conversion functions must assure valid UTF-8, so we have + * to check for UTF-8 validity. Preferably, this converter shouldn't + * be used at all. + */ memcpy(out, inb, len); *outlen = len; @@ -396,7 +401,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ int UTF8Toisolat1(unsigned char* out, int *outlen, @@ -496,13 +501,18 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, { unsigned char* outstart = out; const unsigned char* processed = inb; - unsigned char* outend = out + *outlen; + unsigned char* outend; unsigned short* in = (unsigned short*) inb; unsigned short* inend; unsigned int c, d, inlen; unsigned char *tmp; int bits; + if (*outlen == 0) { + *inlenb = 0; + return(0); + } + outend = out + *outlen; if ((*inlenb % 2) == 1) (*inlenb)--; inlen = *inlenb / 2; @@ -1483,16 +1493,25 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { if ((handler == NULL) || (handlers == NULL)) { xmlEncodingErr(XML_I18N_NO_HANDLER, "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); - return; + goto free_handler; } if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { xmlEncodingErr(XML_I18N_EXCESS_HANDLER, "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", "MAX_ENCODING_HANDLERS"); - return; + goto free_handler; } handlers[nbCharEncodingHandler++] = handler; + return; + +free_handler: + if (handler != NULL) { + if (handler->name != NULL) { + xmlFree(handler->name); + } + xmlFree(handler); + } } /** @@ -1784,7 +1803,7 @@ xmlFindCharEncodingHandler(const char *name) { * @cd: iconv converter data structure * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out - * @in: a pointer to an array of ISO Latin 1 chars + * @in: a pointer to an array of input bytes * @inlen: the length of @in * * Returns 0 if success, or @@ -1795,7 +1814,7 @@ xmlFindCharEncodingHandler(const char *name) { * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, @@ -1851,7 +1870,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * @toUnicode : non-zero if toUnicode. 0 otherwise. * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out - * @in: a pointer to an array of ISO Latin 1 chars + * @in: a pointer to an array of input bytes * @inlen: the length of @in * @flush: if true, indicates end of input * @@ -1863,7 +1882,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, @@ -1912,6 +1931,25 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, * * ************************************************************************/ +/** + * xmlEncInputChunk: + * @handler: encoding handler + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of input bytes + * @inlen: the length of @in + * @flush: flush (ICU-related) + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is 0, else unpredictable. + * The value of @outlen after return is the number of octets produced. + */ static int xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) { @@ -1920,6 +1958,8 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->input != NULL) { ret = handler->input(out, outlen, in, inlen); + if (ret > 0) + ret = 0; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_in != NULL) { @@ -1941,7 +1981,25 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, return(ret); } -/* Returns -4 if no output function was found. */ +/** + * xmlEncOutputChunk: + * @handler: encoding handler + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of input bytes + * @inlen: the length of @in + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * -4 if no output function was found. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is 0, else unpredictable. + * The value of @outlen after return is the number of octets produced. + */ static int xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen) { @@ -1949,6 +2007,8 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->output != NULL) { ret = handler->output(out, outlen, in, inlen); + if (ret > 0) + ret = 0; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_out != NULL) { @@ -1958,7 +2018,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, #ifdef LIBXML_ICU_ENABLED else if (handler->uconv_out != NULL) { ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen, - true); + 1); } #endif /* LIBXML_ICU_ENABLED */ else { @@ -2054,7 +2114,7 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; - return(ret); + return(written ? written : ret); } /** @@ -2184,7 +2244,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; - return(ret); + return(c_out ? c_out : ret); } /** @@ -2394,7 +2454,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) { int ret; size_t written; - size_t writtentot = 0; + int writtentot = 0; size_t toconv; int c_in; int c_out; @@ -2427,7 +2487,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) xmlGenericError(xmlGenericErrorContext, "initialized encoder\n"); #endif - return(0); + return(c_out); } /* @@ -2540,7 +2600,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) goto retry; } } - return(ret); + return(writtentot ? writtentot : ret); } #endif @@ -2569,7 +2629,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, int written; int writtentot = 0; int toconv; - int output = 0; if (handler == NULL) return(-1); if (out == NULL) return(-1); @@ -2622,8 +2681,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, ret = -3; } - if (ret >= 0) output += ret; - /* * Attempt to handle error cases */ @@ -2705,7 +2762,7 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, goto retry; } } - return(ret); + return(writtentot ? writtentot : ret); } /** diff --git a/third_party/libxml/src/entities.c b/third_party/libxml/src/entities.c index 4b41fe970da3..8307471246d7 100644 --- a/third_party/libxml/src/entities.c +++ b/third_party/libxml/src/entities.c @@ -211,7 +211,7 @@ xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, const xmlChar *content) { xmlDictPtr dict = NULL; xmlEntitiesTablePtr table = NULL; - xmlEntityPtr ret; + xmlEntityPtr ret, predef; if (name == NULL) return(NULL); @@ -224,6 +224,44 @@ xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, case XML_INTERNAL_GENERAL_ENTITY: case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: + predef = xmlGetPredefinedEntity(name); + if (predef != NULL) { + int valid = 0; + + /* 4.6 Predefined Entities */ + if ((type == XML_INTERNAL_GENERAL_ENTITY) && + (content != NULL)) { + int c = predef->content[0]; + + if (((content[0] == c) && (content[1] == 0)) && + ((c == '>') || (c == '\'') || (c == '"'))) { + valid = 1; + } else if ((content[0] == '&') && (content[1] == '#')) { + if (content[2] == 'x') { + xmlChar *hex = BAD_CAST "0123456789ABCDEF"; + xmlChar ref[] = "00;"; + + ref[0] = hex[c / 16 % 16]; + ref[1] = hex[c % 16]; + if (xmlStrcasecmp(&content[3], ref) == 0) + valid = 1; + } else { + xmlChar ref[] = "00;"; + + ref[0] = '0' + c / 10 % 10; + ref[1] = '0' + c % 10; + if (xmlStrEqual(&content[2], ref)) + valid = 1; + } + } + } + if (!valid) { + xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING, + "xmlAddEntity: invalid redeclaration of predefined" + " entity"); + return(NULL); + } + } if (dtd->entities == NULL) dtd->entities = xmlHashCreateDict(0, dict); table = dtd->entities; diff --git a/third_party/libxml/src/error.c b/third_party/libxml/src/error.c index 86ea487ea179..ac2aa73930ae 100644 --- a/third_party/libxml/src/error.c +++ b/third_party/libxml/src/error.c @@ -73,6 +73,7 @@ xmlGenericErrorDefaultFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg, ...) { if (xmlGenericErrorContext == NULL) xmlGenericErrorContext = (void *) stderr; + #ifndef STARBOARD va_start(args, msg); vfprintf((FILE *)xmlGenericErrorContext, msg, args); @@ -562,6 +563,7 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, * of the usual "base" (doc->URL) for the node (bug 152623). */ xmlNodePtr prev = baseptr; + char *href = NULL; int inclcount = 0; while (prev != NULL) { if (prev->prev == NULL) @@ -569,21 +571,20 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, else { prev = prev->prev; if (prev->type == XML_XINCLUDE_START) { - if (--inclcount < 0) - break; + if (inclcount > 0) { + --inclcount; + } else { + href = (char *) xmlGetProp(prev, BAD_CAST "href"); + if (href != NULL) + break; + } } else if (prev->type == XML_XINCLUDE_END) inclcount++; } } - if (prev != NULL) { - if (prev->type == XML_XINCLUDE_START) { - prev->type = XML_ELEMENT_NODE; - to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); - prev->type = XML_XINCLUDE_START; - } else { - to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); - } - } else + if (href != NULL) + to->file = href; + else #endif to->file = (char *) xmlStrdup(baseptr->doc->URL); if ((to->file == NULL) && (node != NULL) && (node->doc != NULL)) { diff --git a/third_party/libxml/src/fuzz/Makefile.am b/third_party/libxml/src/fuzz/Makefile.am new file mode 100644 index 000000000000..6f487137a1d0 --- /dev/null +++ b/third_party/libxml/src/fuzz/Makefile.am @@ -0,0 +1,140 @@ +AUTOMAKE_OPTIONS = -Wno-syntax +EXTRA_PROGRAMS = genSeed html regexp schema uri xml xpath +check_PROGRAMS = testFuzzer +EXTRA_DIST = html.dict regexp.dict schema.dict xml.dict xpath.dict \ + seed/uri seed/regexp fuzz.h +CLEANFILES = $(EXTRA_PROGRAMS) +AM_CPPFLAGS = -I$(top_srcdir)/include +DEPENDENCIES = $(top_builddir)/libxml2.la +LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD) + +XML_MAX_LEN = 80000 +# Single quotes to avoid wildcard expansion by the shell +XML_SEED_CORPUS_SRC = \ + '$(top_srcdir)/test/*' \ + '$(top_srcdir)/test/errors/*.xml' \ + '$(top_srcdir)/test/errors10/*.xml' \ + '$(top_srcdir)/test/namespaces/*' \ + '$(top_srcdir)/test/valid/*.xml' \ + '$(top_srcdir)/test/VC/*' \ + '$(top_srcdir)/test/VCM/*' \ + '$(top_srcdir)/test/XInclude/docs/*' \ + '$(top_srcdir)/test/xmlid/*' + +testFuzzer_SOURCES = testFuzzer.c fuzz.c + +.PHONY: tests corpus clean-corpus + +corpus: seed/html.stamp seed/schema.stamp seed/xml.stamp seed/xpath.stamp + +tests: testFuzzer$(EXEEXT) corpus + @echo "## Running fuzzer tests" + @./testFuzzer$(EXEEXT) + +clean-corpus: + rm -rf seed/html.stamp seed/html + rm -rf seed/schema.stamp seed/schema + rm -rf seed/xml.stamp seed/xml + rm -rf seed/xpath.stamp seed/xpath + +# Seed corpus + +genSeed_SOURCES = genSeed.c fuzz.c + +# XML fuzzer + +seed/xml.stamp: genSeed$(EXEEXT) + @mkdir -p seed/xml + @./genSeed$(EXEEXT) xml $(XML_SEED_CORPUS_SRC) + @touch seed/xml.stamp + +xml_SOURCES = xml.c fuzz.c +xml_LDFLAGS = -fsanitize=fuzzer + +fuzz-xml: xml$(EXEEXT) seed/xml.stamp + @mkdir -p corpus/xml + ./xml$(EXEEXT) \ + -dict=xml.dict \ + -max_len=$(XML_MAX_LEN) \ + -timeout=20 \ + corpus/xml seed/xml + +# HTML fuzzer + +seed/html.stamp: genSeed$(EXEEXT) + @mkdir -p seed/html + @./genSeed$(EXEEXT) html '$(top_srcdir)/test/HTML/*' + @touch seed/html.stamp + +html_SOURCES = html.c fuzz.c +html_LDFLAGS = -fsanitize=fuzzer + +fuzz-html: html$(EXEEXT) seed/html.stamp + @mkdir -p corpus/html + ./html$(EXEEXT) \ + -dict=html.dict \ + -max_len=1000000 \ + -timeout=10 \ + corpus/html seed/html + +# Regexp fuzzer + +regexp_SOURCES = regexp.c fuzz.c +regexp_LDFLAGS = -fsanitize=fuzzer + +fuzz-regexp: regexp$(EXEEXT) + @mkdir -p corpus/regexp + ./regexp$(EXEEXT) \ + -dict=regexp.dict \ + -max_len=200 \ + -timeout=5 \ + corpus/regexp $(srcdir)/seed/regexp + +# URI fuzzer + +uri_SOURCES = uri.c fuzz.c +uri_LDFLAGS = -fsanitize=fuzzer + +fuzz-uri: uri$(EXEEXT) + @mkdir -p corpus/uri + ./uri$(EXEEXT) \ + -max_len=10000 \ + -timeout=2 \ + corpus/uri $(srcdir)/seed/uri + +# XML Schema fuzzer + +seed/schema.stamp: genSeed$(EXEEXT) + @mkdir -p seed/schema + @./genSeed$(EXEEXT) schema '$(top_srcdir)/test/schemas/*.xsd' + @touch seed/schema.stamp + +schema_SOURCES = schema.c fuzz.c +schema_LDFLAGS = -fsanitize=fuzzer + +fuzz-schema: schema$(EXEEXT) seed/schema.stamp + @mkdir -p corpus/schema + ./schema$(EXEEXT) \ + -dict=schema.dict \ + -max_len=$(XML_MAX_LEN) \ + -timeout=20 \ + corpus/schema seed/schema + +# XPath fuzzer + +seed/xpath.stamp: genSeed$(EXEEXT) + @mkdir -p seed/xpath + @./genSeed$(EXEEXT) xpath "$(top_builddir)/test/XPath" + @touch seed/xpath.stamp + +xpath_SOURCES = xpath.c fuzz.c +xpath_LDFLAGS = -fsanitize=fuzzer + +fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp + @mkdir -p corpus/xpath + ./xpath$(EXEEXT) \ + -dict=xpath.dict \ + -max_len=10000 \ + -timeout=20 \ + corpus/xpath seed/xpath + diff --git a/third_party/libxml/src/fuzz/Makefile.in b/third_party/libxml/src/fuzz/Makefile.in new file mode 100644 index 000000000000..47f1edb3d529 --- /dev/null +++ b/third_party/libxml/src/fuzz/Makefile.in @@ -0,0 +1,973 @@ +# Makefile.in generated by automake 1.16.3 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2020 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +EXTRA_PROGRAMS = genSeed$(EXEEXT) html$(EXEEXT) regexp$(EXEEXT) \ + schema$(EXEEXT) uri$(EXEEXT) xml$(EXEEXT) xpath$(EXEEXT) +check_PROGRAMS = testFuzzer$(EXEEXT) +subdir = fuzz +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am_genSeed_OBJECTS = genSeed.$(OBJEXT) fuzz.$(OBJEXT) +genSeed_OBJECTS = $(am_genSeed_OBJECTS) +genSeed_LDADD = $(LDADD) +am__DEPENDENCIES_1 = +genSeed_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(top_builddir)/libxml2.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +am_html_OBJECTS = html.$(OBJEXT) fuzz.$(OBJEXT) +html_OBJECTS = $(am_html_OBJECTS) +html_LDADD = $(LDADD) +html_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +html_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(html_LDFLAGS) $(LDFLAGS) -o $@ +am_regexp_OBJECTS = regexp.$(OBJEXT) fuzz.$(OBJEXT) +regexp_OBJECTS = $(am_regexp_OBJECTS) +regexp_LDADD = $(LDADD) +regexp_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +regexp_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(regexp_LDFLAGS) $(LDFLAGS) -o $@ +am_schema_OBJECTS = schema.$(OBJEXT) fuzz.$(OBJEXT) +schema_OBJECTS = $(am_schema_OBJECTS) +schema_LDADD = $(LDADD) +schema_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +schema_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(schema_LDFLAGS) $(LDFLAGS) -o $@ +am_testFuzzer_OBJECTS = testFuzzer.$(OBJEXT) fuzz.$(OBJEXT) +testFuzzer_OBJECTS = $(am_testFuzzer_OBJECTS) +testFuzzer_LDADD = $(LDADD) +testFuzzer_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(top_builddir)/libxml2.la $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +am_uri_OBJECTS = uri.$(OBJEXT) fuzz.$(OBJEXT) +uri_OBJECTS = $(am_uri_OBJECTS) +uri_LDADD = $(LDADD) +uri_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +uri_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(uri_LDFLAGS) $(LDFLAGS) -o $@ +am_xml_OBJECTS = xml.$(OBJEXT) fuzz.$(OBJEXT) +xml_OBJECTS = $(am_xml_OBJECTS) +xml_LDADD = $(LDADD) +xml_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +xml_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(xml_LDFLAGS) $(LDFLAGS) -o $@ +am_xpath_OBJECTS = xpath.$(OBJEXT) fuzz.$(OBJEXT) +xpath_OBJECTS = $(am_xpath_OBJECTS) +xpath_LDADD = $(LDADD) +xpath_DEPENDENCIES = $(am__DEPENDENCIES_1) $(top_builddir)/libxml2.la \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +xpath_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(xpath_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/fuzz.Po ./$(DEPDIR)/genSeed.Po \ + ./$(DEPDIR)/html.Po ./$(DEPDIR)/regexp.Po \ + ./$(DEPDIR)/schema.Po ./$(DEPDIR)/testFuzzer.Po \ + ./$(DEPDIR)/uri.Po ./$(DEPDIR)/xml.Po ./$(DEPDIR)/xpath.Po +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(genSeed_SOURCES) $(html_SOURCES) $(regexp_SOURCES) \ + $(schema_SOURCES) $(testFuzzer_SOURCES) $(uri_SOURCES) \ + $(xml_SOURCES) $(xpath_SOURCES) +DIST_SOURCES = $(genSeed_SOURCES) $(html_SOURCES) $(regexp_SOURCES) \ + $(schema_SOURCES) $(testFuzzer_SOURCES) $(uri_SOURCES) \ + $(xml_SOURCES) $(xpath_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp README +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BASE_THREAD_LIBS = @BASE_THREAD_LIBS@ +C14N_OBJ = @C14N_OBJ@ +CATALOG_OBJ = @CATALOG_OBJ@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +CYGWIN_EXTRA_LDFLAGS = @CYGWIN_EXTRA_LDFLAGS@ +CYGWIN_EXTRA_PYTHON_LIBADD = @CYGWIN_EXTRA_PYTHON_LIBADD@ +DEBUG_OBJ = @DEBUG_OBJ@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOCB_OBJ = @DOCB_OBJ@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +EXTRA_CFLAGS = @EXTRA_CFLAGS@ +FGREP = @FGREP@ +FTP_OBJ = @FTP_OBJ@ +GREP = @GREP@ +HAVE_ISINF = @HAVE_ISINF@ +HAVE_ISNAN = @HAVE_ISNAN@ +HTML_DIR = @HTML_DIR@ +HTML_OBJ = @HTML_OBJ@ +HTTP_OBJ = @HTTP_OBJ@ +ICONV_LIBS = @ICONV_LIBS@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_LIBS = @ICU_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBXML_MAJOR_VERSION = @LIBXML_MAJOR_VERSION@ +LIBXML_MICRO_VERSION = @LIBXML_MICRO_VERSION@ +LIBXML_MINOR_VERSION = @LIBXML_MINOR_VERSION@ +LIBXML_VERSION = @LIBXML_VERSION@ +LIBXML_VERSION_EXTRA = @LIBXML_VERSION_EXTRA@ +LIBXML_VERSION_INFO = @LIBXML_VERSION_INFO@ +LIBXML_VERSION_NUMBER = @LIBXML_VERSION_NUMBER@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +LZMA_CFLAGS = @LZMA_CFLAGS@ +LZMA_LIBS = @LZMA_LIBS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MODULE_EXTENSION = @MODULE_EXTENSION@ +MODULE_PLATFORM_LIBS = @MODULE_PLATFORM_LIBS@ +MV = @MV@ +M_LIBS = @M_LIBS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PYTHON = @PYTHON@ +PYTHON_INCLUDES = @PYTHON_INCLUDES@ +PYTHON_LIBS = @PYTHON_LIBS@ +PYTHON_SITE_PACKAGES = @PYTHON_SITE_PACKAGES@ +PYTHON_SUBDIR = @PYTHON_SUBDIR@ +PYTHON_TESTS = @PYTHON_TESTS@ +PYTHON_VERSION = @PYTHON_VERSION@ +RANLIB = @RANLIB@ +RDL_LIBS = @RDL_LIBS@ +READER_TEST = @READER_TEST@ +RELDATE = @RELDATE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STATIC_BINARIES = @STATIC_BINARIES@ +STRIP = @STRIP@ +TAR = @TAR@ +TEST_C14N = @TEST_C14N@ +TEST_CATALOG = @TEST_CATALOG@ +TEST_DEBUG = @TEST_DEBUG@ +TEST_HTML = @TEST_HTML@ +TEST_MODULES = @TEST_MODULES@ +TEST_PATTERN = @TEST_PATTERN@ +TEST_PHTML = @TEST_PHTML@ +TEST_PUSH = @TEST_PUSH@ +TEST_REGEXPS = @TEST_REGEXPS@ +TEST_SAX = @TEST_SAX@ +TEST_SCHEMAS = @TEST_SCHEMAS@ +TEST_SCHEMATRON = @TEST_SCHEMATRON@ +TEST_THREADS = @TEST_THREADS@ +TEST_VALID = @TEST_VALID@ +TEST_VTIME = @TEST_VTIME@ +TEST_XINCLUDE = @TEST_XINCLUDE@ +TEST_XPATH = @TEST_XPATH@ +TEST_XPTR = @TEST_XPTR@ +THREAD_CFLAGS = @THREAD_CFLAGS@ +THREAD_LIBS = @THREAD_LIBS@ +VERSION = @VERSION@ +VERSION_SCRIPT_FLAGS = @VERSION_SCRIPT_FLAGS@ +WGET = @WGET@ +WIN32_EXTRA_LDFLAGS = @WIN32_EXTRA_LDFLAGS@ +WIN32_EXTRA_LIBADD = @WIN32_EXTRA_LIBADD@ +WIN32_EXTRA_PYTHON_LIBADD = @WIN32_EXTRA_PYTHON_LIBADD@ +WITH_C14N = @WITH_C14N@ +WITH_CATALOG = @WITH_CATALOG@ +WITH_DEBUG = @WITH_DEBUG@ +WITH_DOCB = @WITH_DOCB@ +WITH_FTP = @WITH_FTP@ +WITH_HTML = @WITH_HTML@ +WITH_HTTP = @WITH_HTTP@ +WITH_ICONV = @WITH_ICONV@ +WITH_ICU = @WITH_ICU@ +WITH_ISO8859X = @WITH_ISO8859X@ +WITH_LEGACY = @WITH_LEGACY@ +WITH_LZMA = @WITH_LZMA@ +WITH_MEM_DEBUG = @WITH_MEM_DEBUG@ +WITH_MODULES = @WITH_MODULES@ +WITH_OUTPUT = @WITH_OUTPUT@ +WITH_PATTERN = @WITH_PATTERN@ +WITH_PUSH = @WITH_PUSH@ +WITH_READER = @WITH_READER@ +WITH_REGEXPS = @WITH_REGEXPS@ +WITH_RUN_DEBUG = @WITH_RUN_DEBUG@ +WITH_SAX1 = @WITH_SAX1@ +WITH_SCHEMAS = @WITH_SCHEMAS@ +WITH_SCHEMATRON = @WITH_SCHEMATRON@ +WITH_THREADS = @WITH_THREADS@ +WITH_THREAD_ALLOC = @WITH_THREAD_ALLOC@ +WITH_TREE = @WITH_TREE@ +WITH_TRIO = @WITH_TRIO@ +WITH_VALID = @WITH_VALID@ +WITH_WRITER = @WITH_WRITER@ +WITH_XINCLUDE = @WITH_XINCLUDE@ +WITH_XPATH = @WITH_XPATH@ +WITH_XPTR = @WITH_XPTR@ +WITH_ZLIB = @WITH_ZLIB@ +XINCLUDE_OBJ = @XINCLUDE_OBJ@ +XMLLINT = @XMLLINT@ +XML_CFLAGS = @XML_CFLAGS@ +XML_INCLUDEDIR = @XML_INCLUDEDIR@ +XML_LIBDIR = @XML_LIBDIR@ +XML_LIBS = @XML_LIBS@ +XML_LIBTOOLLIBS = @XML_LIBTOOLLIBS@ +XML_PRIVATE_LIBS = @XML_PRIVATE_LIBS@ +XPATH_OBJ = @XPATH_OBJ@ +XPTR_OBJ = @XPTR_OBJ@ +XSLTPROC = @XSLTPROC@ +Z_CFLAGS = @Z_CFLAGS@ +Z_LIBS = @Z_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +pythondir = @pythondir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +AUTOMAKE_OPTIONS = -Wno-syntax +EXTRA_DIST = html.dict regexp.dict schema.dict xml.dict xpath.dict \ + seed/uri seed/regexp fuzz.h + +CLEANFILES = $(EXTRA_PROGRAMS) +AM_CPPFLAGS = -I$(top_srcdir)/include +DEPENDENCIES = $(top_builddir)/libxml2.la +LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) $(M_LIBS) $(WIN32_EXTRA_LIBADD) +XML_MAX_LEN = 80000 +# Single quotes to avoid wildcard expansion by the shell +XML_SEED_CORPUS_SRC = \ + '$(top_srcdir)/test/*' \ + '$(top_srcdir)/test/errors/*.xml' \ + '$(top_srcdir)/test/errors10/*.xml' \ + '$(top_srcdir)/test/namespaces/*' \ + '$(top_srcdir)/test/valid/*.xml' \ + '$(top_srcdir)/test/VC/*' \ + '$(top_srcdir)/test/VCM/*' \ + '$(top_srcdir)/test/XInclude/docs/*' \ + '$(top_srcdir)/test/xmlid/*' + +testFuzzer_SOURCES = testFuzzer.c fuzz.c + +# Seed corpus +genSeed_SOURCES = genSeed.c fuzz.c +xml_SOURCES = xml.c fuzz.c +xml_LDFLAGS = -fsanitize=fuzzer +html_SOURCES = html.c fuzz.c +html_LDFLAGS = -fsanitize=fuzzer + +# Regexp fuzzer +regexp_SOURCES = regexp.c fuzz.c +regexp_LDFLAGS = -fsanitize=fuzzer + +# URI fuzzer +uri_SOURCES = uri.c fuzz.c +uri_LDFLAGS = -fsanitize=fuzzer +schema_SOURCES = schema.c fuzz.c +schema_LDFLAGS = -fsanitize=fuzzer +xpath_SOURCES = xpath.c fuzz.c +xpath_LDFLAGS = -fsanitize=fuzzer +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign fuzz/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign fuzz/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-checkPROGRAMS: + @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list + +genSeed$(EXEEXT): $(genSeed_OBJECTS) $(genSeed_DEPENDENCIES) $(EXTRA_genSeed_DEPENDENCIES) + @rm -f genSeed$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(genSeed_OBJECTS) $(genSeed_LDADD) $(LIBS) + +html$(EXEEXT): $(html_OBJECTS) $(html_DEPENDENCIES) $(EXTRA_html_DEPENDENCIES) + @rm -f html$(EXEEXT) + $(AM_V_CCLD)$(html_LINK) $(html_OBJECTS) $(html_LDADD) $(LIBS) + +regexp$(EXEEXT): $(regexp_OBJECTS) $(regexp_DEPENDENCIES) $(EXTRA_regexp_DEPENDENCIES) + @rm -f regexp$(EXEEXT) + $(AM_V_CCLD)$(regexp_LINK) $(regexp_OBJECTS) $(regexp_LDADD) $(LIBS) + +schema$(EXEEXT): $(schema_OBJECTS) $(schema_DEPENDENCIES) $(EXTRA_schema_DEPENDENCIES) + @rm -f schema$(EXEEXT) + $(AM_V_CCLD)$(schema_LINK) $(schema_OBJECTS) $(schema_LDADD) $(LIBS) + +testFuzzer$(EXEEXT): $(testFuzzer_OBJECTS) $(testFuzzer_DEPENDENCIES) $(EXTRA_testFuzzer_DEPENDENCIES) + @rm -f testFuzzer$(EXEEXT) + $(AM_V_CCLD)$(LINK) $(testFuzzer_OBJECTS) $(testFuzzer_LDADD) $(LIBS) + +uri$(EXEEXT): $(uri_OBJECTS) $(uri_DEPENDENCIES) $(EXTRA_uri_DEPENDENCIES) + @rm -f uri$(EXEEXT) + $(AM_V_CCLD)$(uri_LINK) $(uri_OBJECTS) $(uri_LDADD) $(LIBS) + +xml$(EXEEXT): $(xml_OBJECTS) $(xml_DEPENDENCIES) $(EXTRA_xml_DEPENDENCIES) + @rm -f xml$(EXEEXT) + $(AM_V_CCLD)$(xml_LINK) $(xml_OBJECTS) $(xml_LDADD) $(LIBS) + +xpath$(EXEEXT): $(xpath_OBJECTS) $(xpath_DEPENDENCIES) $(EXTRA_xpath_DEPENDENCIES) + @rm -f xpath$(EXEEXT) + $(AM_V_CCLD)$(xpath_LINK) $(xpath_OBJECTS) $(xpath_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fuzz.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/genSeed.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/html.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regexp.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/schema.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testFuzzer.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uri.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml.Po@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xpath.Po@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-checkPROGRAMS clean-generic clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/fuzz.Po + -rm -f ./$(DEPDIR)/genSeed.Po + -rm -f ./$(DEPDIR)/html.Po + -rm -f ./$(DEPDIR)/regexp.Po + -rm -f ./$(DEPDIR)/schema.Po + -rm -f ./$(DEPDIR)/testFuzzer.Po + -rm -f ./$(DEPDIR)/uri.Po + -rm -f ./$(DEPDIR)/xml.Po + -rm -f ./$(DEPDIR)/xpath.Po + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/fuzz.Po + -rm -f ./$(DEPDIR)/genSeed.Po + -rm -f ./$(DEPDIR)/html.Po + -rm -f ./$(DEPDIR)/regexp.Po + -rm -f ./$(DEPDIR)/schema.Po + -rm -f ./$(DEPDIR)/testFuzzer.Po + -rm -f ./$(DEPDIR)/uri.Po + -rm -f ./$(DEPDIR)/xml.Po + -rm -f ./$(DEPDIR)/xpath.Po + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: check-am install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-checkPROGRAMS clean-generic clean-libtool cscopelist-am \ + ctags ctags-am distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +.PHONY: tests corpus clean-corpus + +corpus: seed/html.stamp seed/schema.stamp seed/xml.stamp seed/xpath.stamp + +tests: testFuzzer$(EXEEXT) corpus + @echo "## Running fuzzer tests" + @./testFuzzer$(EXEEXT) + +clean-corpus: + rm -rf seed/html.stamp seed/html + rm -rf seed/schema.stamp seed/schema + rm -rf seed/xml.stamp seed/xml + rm -rf seed/xpath.stamp seed/xpath + +# XML fuzzer + +seed/xml.stamp: genSeed$(EXEEXT) + @mkdir -p seed/xml + @./genSeed$(EXEEXT) xml $(XML_SEED_CORPUS_SRC) + @touch seed/xml.stamp + +fuzz-xml: xml$(EXEEXT) seed/xml.stamp + @mkdir -p corpus/xml + ./xml$(EXEEXT) \ + -dict=xml.dict \ + -max_len=$(XML_MAX_LEN) \ + -timeout=20 \ + corpus/xml seed/xml + +# HTML fuzzer + +seed/html.stamp: genSeed$(EXEEXT) + @mkdir -p seed/html + @./genSeed$(EXEEXT) html '$(top_srcdir)/test/HTML/*' + @touch seed/html.stamp + +fuzz-html: html$(EXEEXT) seed/html.stamp + @mkdir -p corpus/html + ./html$(EXEEXT) \ + -dict=html.dict \ + -max_len=1000000 \ + -timeout=10 \ + corpus/html seed/html + +fuzz-regexp: regexp$(EXEEXT) + @mkdir -p corpus/regexp + ./regexp$(EXEEXT) \ + -dict=regexp.dict \ + -max_len=200 \ + -timeout=5 \ + corpus/regexp $(srcdir)/seed/regexp + +fuzz-uri: uri$(EXEEXT) + @mkdir -p corpus/uri + ./uri$(EXEEXT) \ + -max_len=10000 \ + -timeout=2 \ + corpus/uri $(srcdir)/seed/uri + +# XML Schema fuzzer + +seed/schema.stamp: genSeed$(EXEEXT) + @mkdir -p seed/schema + @./genSeed$(EXEEXT) schema '$(top_srcdir)/test/schemas/*.xsd' + @touch seed/schema.stamp + +fuzz-schema: schema$(EXEEXT) seed/schema.stamp + @mkdir -p corpus/schema + ./schema$(EXEEXT) \ + -dict=schema.dict \ + -max_len=$(XML_MAX_LEN) \ + -timeout=20 \ + corpus/schema seed/schema + +# XPath fuzzer + +seed/xpath.stamp: genSeed$(EXEEXT) + @mkdir -p seed/xpath + @./genSeed$(EXEEXT) xpath "$(top_builddir)/test/XPath" + @touch seed/xpath.stamp + +fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp + @mkdir -p corpus/xpath + ./xpath$(EXEEXT) \ + -dict=xpath.dict \ + -max_len=10000 \ + -timeout=20 \ + corpus/xpath seed/xpath + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/third_party/libxml/src/fuzz/README b/third_party/libxml/src/fuzz/README new file mode 100644 index 000000000000..f675ad82557d --- /dev/null +++ b/third_party/libxml/src/fuzz/README @@ -0,0 +1,19 @@ +libFuzzer instructions for libxml2 +================================== + +Set compiler and options: + + export CC=clang + export CFLAGS="-g -fsanitize=fuzzer-no-link,address,undefined \ + -fno-sanitize-recover=all \ + -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" + +Build libxml2 with instrumentation: + + ./configure --without-python + make + +Run fuzzers: + + make -C fuzz fuzz-xml + diff --git a/third_party/libxml/src/fuzz/fuzz.c b/third_party/libxml/src/fuzz/fuzz.c new file mode 100644 index 000000000000..212136ac8983 --- /dev/null +++ b/third_party/libxml/src/fuzz/fuzz.c @@ -0,0 +1,355 @@ +/* + * fuzz.c: Common functions for fuzzing. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "fuzz.h" + +typedef struct { + const char *data; + size_t size; +} xmlFuzzEntityInfo; + +/* Single static instance for now */ +static struct { + /* Original data */ + const char *data; + size_t size; + + /* Remaining data */ + const char *ptr; + size_t remaining; + + /* Buffer for unescaped strings */ + char *outBuf; + char *outPtr; /* Free space at end of buffer */ + + xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ + + /* The first entity is the main entity. */ + const char *mainUrl; + xmlFuzzEntityInfo *mainEntity; +} fuzzData; + +/** + * xmlFuzzErrorFunc: + * + * An error function that simply discards all errors. + */ +void +xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, + ...) { +} + +/** + * xmlFuzzDataInit: + * + * Initialize fuzz data provider. + */ +void +xmlFuzzDataInit(const char *data, size_t size) { + fuzzData.data = data; + fuzzData.size = size; + fuzzData.ptr = data; + fuzzData.remaining = size; + + fuzzData.outBuf = xmlMalloc(size + 1); + fuzzData.outPtr = fuzzData.outBuf; + + fuzzData.entities = xmlHashCreate(8); + fuzzData.mainUrl = NULL; + fuzzData.mainEntity = NULL; +} + +/** + * xmlFuzzDataFree: + * + * Cleanup fuzz data provider. + */ +void +xmlFuzzDataCleanup(void) { + xmlFree(fuzzData.outBuf); + xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator); +} + +/** + * xmlFuzzReadInt: + * @size: size of string in bytes + * + * Read an integer from the fuzz data. + */ +int +xmlFuzzReadInt() { + int ret; + + if (fuzzData.remaining < sizeof(int)) + return(0); + memcpy(&ret, fuzzData.ptr, sizeof(int)); + fuzzData.ptr += sizeof(int); + fuzzData.remaining -= sizeof(int); + + return ret; +} + +/** + * xmlFuzzReadRemaining: + * @size: size of string in bytes + * + * Read remaining bytes from fuzz data. + */ +const char * +xmlFuzzReadRemaining(size_t *size) { + const char *ret = fuzzData.ptr; + + *size = fuzzData.remaining; + fuzzData.ptr += fuzzData.remaining; + fuzzData.remaining = 0; + + return(ret); +} + +/* + * xmlFuzzWriteString: + * @out: output file + * @str: string to write + * + * Write a random-length string to file in a format similar to + * FuzzedDataProvider. Backslash followed by newline marks the end of the + * string. Two backslashes are used to escape a backslash. + */ +void +xmlFuzzWriteString(FILE *out, const char *str) { + for (; *str; str++) { + int c = (unsigned char) *str; + putc(c, out); + if (c == '\\') + putc(c, out); + } + putc('\\', out); + putc('\n', out); +} + +/** + * xmlFuzzReadString: + * @size: size of string in bytes + * + * Read a random-length string from the fuzz data. + * + * The format is similar to libFuzzer's FuzzedDataProvider but treats + * backslash followed by newline as end of string. This makes the fuzz data + * more readable. A backslash character is escaped with another backslash. + * + * Returns a zero-terminated string or NULL if the fuzz data is exhausted. + */ +const char * +xmlFuzzReadString(size_t *size) { + const char *out = fuzzData.outPtr; + + while (fuzzData.remaining > 0) { + int c = *fuzzData.ptr++; + fuzzData.remaining--; + + if ((c == '\\') && (fuzzData.remaining > 0)) { + int c2 = *fuzzData.ptr; + + if (c2 == '\n') { + fuzzData.ptr++; + fuzzData.remaining--; + *size = fuzzData.outPtr - out; + *fuzzData.outPtr++ = '\0'; + return(out); + } + if (c2 == '\\') { + fuzzData.ptr++; + fuzzData.remaining--; + } + } + + *fuzzData.outPtr++ = c; + } + + if (fuzzData.outPtr > out) { + *size = fuzzData.outPtr - out; + *fuzzData.outPtr++ = '\0'; + return(out); + } + + return(NULL); +} + +/** + * xmlFuzzReadEntities: + * + * Read entities like the main XML file, external DTDs, external parsed + * entities from fuzz data. + */ +void +xmlFuzzReadEntities(void) { + size_t num = 0; + + while (1) { + const char *url, *entity; + size_t urlSize, entitySize; + xmlFuzzEntityInfo *entityInfo; + + url = xmlFuzzReadString(&urlSize); + if (url == NULL) break; + + entity = xmlFuzzReadString(&entitySize); + if (entity == NULL) break; + + if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) { + entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo)); + if (entityInfo == NULL) + break; + entityInfo->data = entity; + entityInfo->size = entitySize; + + xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo); + + if (num == 0) { + fuzzData.mainUrl = url; + fuzzData.mainEntity = entityInfo; + } + + num++; + } + } +} + +/** + * xmlFuzzMainUrl: + * + * Returns the main URL. + */ +const char * +xmlFuzzMainUrl(void) { + return(fuzzData.mainUrl); +} + +/** + * xmlFuzzMainEntity: + * @size: size of the main entity in bytes + * + * Returns the main entity. + */ +const char * +xmlFuzzMainEntity(size_t *size) { + if (fuzzData.mainEntity == NULL) + return(NULL); + *size = fuzzData.mainEntity->size; + return(fuzzData.mainEntity->data); +} + +/** + * xmlFuzzEntityLoader: + * + * The entity loader for fuzz data. + */ +xmlParserInputPtr +xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr input; + xmlFuzzEntityInfo *entity; + + if (URL == NULL) + return(NULL); + entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL); + if (entity == NULL) + return(NULL); + + input = xmlNewInputStream(ctxt); + input->filename = NULL; + input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size, + XML_CHAR_ENCODING_NONE); + if (input->buf == NULL) { + xmlFreeInputStream(input); + return(NULL); + } + input->base = input->cur = xmlBufContent(input->buf->buffer); + input->end = input->base + entity->size; + + return input; +} + +/** + * xmlFuzzExtractStrings: + * + * Extract C strings from input data. Use exact-size allocations to detect + * potential memory errors. + */ +size_t +xmlFuzzExtractStrings(const char *data, size_t size, char **strings, + size_t numStrings) { + const char *start = data; + const char *end = data + size; + size_t i = 0, ret; + + while (i < numStrings) { + size_t strSize = end - start; + const char *zero = memchr(start, 0, strSize); + + if (zero != NULL) + strSize = zero - start; + + strings[i] = xmlMalloc(strSize + 1); + memcpy(strings[i], start, strSize); + strings[i][strSize] = '\0'; + + i++; + if (zero != NULL) + start = zero + 1; + else + break; + } + + ret = i; + + while (i < numStrings) { + strings[i] = NULL; + i++; + } + + return(ret); +} + +char * +xmlSlurpFile(const char *path, size_t *sizeRet) { + FILE *file; + struct stat statbuf; + char *data; + size_t size; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + return(NULL); + size = statbuf.st_size; + file = fopen(path, "rb"); + if (file == NULL) + return(NULL); + data = xmlMalloc(size + 1); + if (data != NULL) { + if (fread(data, 1, size, file) != size) { + xmlFree(data); + data = NULL; + } else { + data[size] = 0; + if (sizeRet != NULL) + *sizeRet = size; + } + } + fclose(file); + + return(data); +} + diff --git a/third_party/libxml/src/fuzz/fuzz.h b/third_party/libxml/src/fuzz/fuzz.h new file mode 100644 index 000000000000..a51b3987abcc --- /dev/null +++ b/third_party/libxml/src/fuzz/fuzz.h @@ -0,0 +1,91 @@ +/* + * fuzz.h: Common functions and macros for fuzzing. + * + * See Copyright for the status of this software. + */ + +#ifndef __XML_FUZZERCOMMON_H__ +#define __XML_FUZZERCOMMON_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(LIBXML_HTML_ENABLED) && defined(LIBXML_OUTPUT_ENABLED) + #define HAVE_HTML_FUZZER +#endif +#if defined(LIBXML_REGEXP_ENABLED) + #define HAVE_REGEXP_FUZZER +#endif +#if defined(LIBXML_SCHEMAS_ENABLED) + #define HAVE_SCHEMA_FUZZER +#endif +#if 1 + #define HAVE_URI_FUZZER +#endif +#if defined(LIBXML_OUTPUT_ENABLED) && \ + defined(LIBXML_READER_ENABLED) && \ + defined(LIBXML_XINCLUDE_ENABLED) + #define HAVE_XML_FUZZER +#endif +#if defined(LIBXML_XPATH_ENABLED) + #define HAVE_XPATH_FUZZER +#endif + +int +LLVMFuzzerInitialize(int *argc, char ***argv); + +int +LLVMFuzzerTestOneInput(const char *data, size_t size); + +void +xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, + ...); + +void +xmlFuzzDataInit(const char *data, size_t size); + +void +xmlFuzzDataCleanup(void); + +int +xmlFuzzReadInt(void); + +const char * +xmlFuzzReadRemaining(size_t *size); + +void +xmlFuzzWriteString(FILE *out, const char *str); + +const char * +xmlFuzzReadString(size_t *size); + +void +xmlFuzzReadEntities(void); + +const char * +xmlFuzzMainUrl(void); + +const char * +xmlFuzzMainEntity(size_t *size); + +xmlParserInputPtr +xmlFuzzEntityLoader(const char *URL, const char *ID, xmlParserCtxtPtr ctxt); + +size_t +xmlFuzzExtractStrings(const char *data, size_t size, char **strings, + size_t numStrings); + +char * +xmlSlurpFile(const char *path, size_t *size); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_FUZZERCOMMON_H__ */ + diff --git a/third_party/libxml/src/fuzz/genSeed.c b/third_party/libxml/src/fuzz/genSeed.c new file mode 100644 index 000000000000..2f03802727ae --- /dev/null +++ b/third_party/libxml/src/fuzz/genSeed.c @@ -0,0 +1,427 @@ +/* + * xmlSeed.c: Generate the XML seed corpus for fuzzing. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include "fuzz.h" + +#define PATH_SIZE 500 +#define SEED_BUF_SIZE 16384 +#define EXPR_SIZE 4500 + +typedef int +(*fileFunc)(const char *base, FILE *out); + +typedef int +(*mainFunc)(const char *arg); + +static struct { + FILE *out; + xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ + xmlExternalEntityLoader oldLoader; + fileFunc processFile; + const char *fuzzer; + int counter; + char cwd[PATH_SIZE]; +} globalData; + +/* + * A custom entity loader that writes all external DTDs or entities to a + * single file in the format expected by xmlFuzzEntityLoader. + */ +static xmlParserInputPtr +fuzzEntityRecorder(const char *URL, const char *ID, + xmlParserCtxtPtr ctxt) { + xmlParserInputPtr in; + static const int chunkSize = 16384; + int len; + + in = xmlNoNetExternalEntityLoader(URL, ID, ctxt); + if (in == NULL) + return(NULL); + + if (globalData.entities == NULL) { + globalData.entities = xmlHashCreate(4); + } else if (xmlHashLookup(globalData.entities, + (const xmlChar *) URL) != NULL) { + return(in); + } + + do { + len = xmlParserInputBufferGrow(in->buf, chunkSize); + if (len < 0) { + fprintf(stderr, "Error reading %s\n", URL); + xmlFreeInputStream(in); + return(NULL); + } + } while (len > 0); + + xmlFuzzWriteString(globalData.out, URL); + xmlFuzzWriteString(globalData.out, + (char *) xmlBufContent(in->buf->buffer)); + + xmlFreeInputStream(in); + + xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, NULL); + + return(xmlNoNetExternalEntityLoader(URL, ID, ctxt)); +} + +static void +fuzzRecorderInit(FILE *out) { + globalData.out = out; + globalData.entities = xmlHashCreate(8); + globalData.oldLoader = xmlGetExternalEntityLoader(); + xmlSetExternalEntityLoader(fuzzEntityRecorder); +} + +static void +fuzzRecorderCleanup() { + xmlSetExternalEntityLoader(globalData.oldLoader); + xmlHashFree(globalData.entities, xmlHashDefaultDeallocator); + globalData.out = NULL; + globalData.entities = NULL; + globalData.oldLoader = NULL; +} + +#ifdef HAVE_XML_FUZZER +static int +processXml(const char *docFile, FILE *out) { + int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; + xmlDocPtr doc; + + fwrite(&opts, sizeof(opts), 1, out); + + fuzzRecorderInit(out); + + doc = xmlReadFile(docFile, NULL, opts); + xmlXIncludeProcessFlags(doc, opts); + xmlFreeDoc(doc); + + fuzzRecorderCleanup(); + + return(0); +} +#endif + +#ifdef HAVE_HTML_FUZZER +static int +processHtml(const char *docFile, FILE *out) { + char buf[SEED_BUF_SIZE]; + FILE *file; + size_t size; + int opts = 0; + + fwrite(&opts, sizeof(opts), 1, out); + + /* Copy file */ + file = fopen(docFile, "rb"); + if (file == NULL) { + fprintf(stderr, "couldn't open %s\n", docFile); + return(0); + } + do { + size = fread(buf, 1, SEED_BUF_SIZE, file); + if (size > 0) + fwrite(buf, 1, size, out); + } while (size == SEED_BUF_SIZE); + fclose(file); + + return(0); +} +#endif + +#ifdef HAVE_SCHEMA_FUZZER +static int +processSchema(const char *docFile, FILE *out) { + xmlSchemaPtr schema; + xmlSchemaParserCtxtPtr pctxt; + + fuzzRecorderInit(out); + + pctxt = xmlSchemaNewParserCtxt(docFile); + xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL); + schema = xmlSchemaParse(pctxt); + xmlSchemaFreeParserCtxt(pctxt); + xmlSchemaFree(schema); + + fuzzRecorderCleanup(); + + return(0); +} +#endif + +static int +processPattern(const char *pattern) { + glob_t globbuf; + int ret = 0; + int res, i; + + res = glob(pattern, 0, NULL, &globbuf); + if (res == GLOB_NOMATCH) + return(0); + if (res != 0) { + fprintf(stderr, "couldn't match pattern %s\n", pattern); + return(-1); + } + + for (i = 0; i < globbuf.gl_pathc; i++) { + struct stat statbuf; + char outPath[PATH_SIZE]; + char *dirBuf = NULL; + char *baseBuf = NULL; + const char *path, *dir, *base; + FILE *out = NULL; + int dirChanged = 0; + size_t size; + + path = globbuf.gl_pathv[i]; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + + dirBuf = (char *) xmlCharStrdup(path); + baseBuf = (char *) xmlCharStrdup(path); + if ((dirBuf == NULL) || (baseBuf == NULL)) { + fprintf(stderr, "memory allocation failed\n"); + ret = -1; + goto error; + } + dir = dirname(dirBuf); + base = basename(baseBuf); + + size = snprintf(outPath, sizeof(outPath), "seed/%s/%s", + globalData.fuzzer, base); + if (size >= PATH_SIZE) { + fprintf(stderr, "creating path failed\n"); + ret = -1; + goto error; + } + out = fopen(outPath, "wb"); + if (out == NULL) { + fprintf(stderr, "couldn't open %s for writing\n", outPath); + ret = -1; + goto error; + } + if (chdir(dir) != 0) { + fprintf(stderr, "couldn't chdir to %s\n", dir); + ret = -1; + goto error; + } + dirChanged = 1; + if (globalData.processFile(base, out) != 0) + ret = -1; + +error: + if (out != NULL) + fclose(out); + xmlFree(dirBuf); + xmlFree(baseBuf); + if ((dirChanged) && (chdir(globalData.cwd) != 0)) { + fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd); + ret = -1; + break; + } + } + + globfree(&globbuf); + return(ret); +} + +#ifdef HAVE_XPATH_FUZZER +static int +processXPath(const char *testDir, const char *prefix, const char *name, + const char *data, const char *subdir, int xptr) { + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0, res; + + size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*", + testDir, subdir, prefix); + if (size >= PATH_SIZE) + return(-1); + res = glob(pattern, 0, NULL, &globbuf); + if (res == GLOB_NOMATCH) + return(0); + if (res != 0) { + fprintf(stderr, "couldn't match pattern %s\n", pattern); + return(-1); + } + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + struct stat statbuf; + FILE *in; + char expr[EXPR_SIZE]; + + if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) + continue; + + in = fopen(path, "rb"); + if (in == NULL) { + ret = -1; + continue; + } + + while (fgets(expr, EXPR_SIZE, in) > 0) { + char outPath[PATH_SIZE]; + FILE *out; + int j; + + for (j = 0; expr[j] != 0; j++) + if (expr[j] == '\r' || expr[j] == '\n') + break; + expr[j] = 0; + + size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d", + name, globalData.counter); + if (size >= PATH_SIZE) { + ret = -1; + continue; + } + out = fopen(outPath, "wb"); + if (out == NULL) { + ret = -1; + continue; + } + + if (xptr) { + xmlFuzzWriteString(out, expr); + } else { + char xptrExpr[EXPR_SIZE+100]; + + /* Wrap XPath expressions as XPointer */ + snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr); + xmlFuzzWriteString(out, xptrExpr); + } + + xmlFuzzWriteString(out, data); + + fclose(out); + globalData.counter++; + } + + fclose(in); + } + + globfree(&globbuf); + + return(ret); +} + +int +processXPathDir(const char *testDir) { + char pattern[PATH_SIZE]; + glob_t globbuf; + size_t i, size; + int ret = 0; + + globalData.counter = 1; + if (processXPath(testDir, "", "expr", "", "expr", 0) != 0) + ret = -1; + + size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir); + if (size >= PATH_SIZE) + return(1); + if (glob(pattern, 0, NULL, &globbuf) != 0) + return(1); + + for (i = 0; i < globbuf.gl_pathc; i++) { + char *path = globbuf.gl_pathv[i]; + char *data; + const char *docFile; + + data = xmlSlurpFile(path, NULL); + if (data == NULL) { + ret = -1; + continue; + } + docFile = basename(path); + + globalData.counter = 1; + if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0) + ret = -1; + if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0) + ret = -1; + + xmlFree(data); + } + + globfree(&globbuf); + + return(ret); +} +#endif + +int +main(int argc, const char **argv) { + mainFunc processArg = NULL; + const char *fuzzer; + int ret = 0; + int xpath = 0; + int i; + + if (argc < 3) { + fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n"); + return(1); + } + + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + fuzzer = argv[1]; + if (strcmp(fuzzer, "html") == 0) { +#ifdef HAVE_HTML_FUZZER + processArg = processPattern; + globalData.processFile = processHtml; +#endif + } else if (strcmp(fuzzer, "schema") == 0) { +#ifdef HAVE_SCHEMA_FUZZER + processArg = processPattern; + globalData.processFile = processSchema; +#endif + } else if (strcmp(fuzzer, "xml") == 0) { +#ifdef HAVE_XML_FUZZER + processArg = processPattern; + globalData.processFile = processXml; +#endif + } else if (strcmp(fuzzer, "xpath") == 0) { +#ifdef HAVE_XPATH_FUZZER + processArg = processXPathDir; +#endif + } else { + fprintf(stderr, "unknown fuzzer %s\n", fuzzer); + return(1); + } + globalData.fuzzer = fuzzer; + + if (getcwd(globalData.cwd, PATH_SIZE) == NULL) { + fprintf(stderr, "couldn't get current directory\n"); + return(1); + } + + if (processArg != NULL) + for (i = 2; i < argc; i++) + processArg(argv[i]); + + return(ret); +} + diff --git a/third_party/libxml/src/fuzz/html.c b/third_party/libxml/src/fuzz/html.c new file mode 100644 index 000000000000..116b3df3299c --- /dev/null +++ b/third_party/libxml/src/fuzz/html.c @@ -0,0 +1,82 @@ +/* + * html.c: a libFuzzer target to test several HTML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); +#ifdef LIBXML_CATALOG_ENABLED + xmlInitializeCatalog(); +#endif + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + static const size_t maxChunkSize = 128; + htmlDocPtr doc; + htmlParserCtxtPtr ctxt; + xmlOutputBufferPtr out; + const char *docBuffer; + size_t docSize, consumed, chunkSize; + int opts, outSize; + + xmlFuzzDataInit(data, size); + opts = xmlFuzzReadInt(); + + docBuffer = xmlFuzzReadRemaining(&docSize); + if (docBuffer == NULL) { + xmlFuzzDataCleanup(); + return(0); + } + + /* Pull parser */ + + doc = htmlReadMemory(docBuffer, docSize, NULL, NULL, opts); + + /* + * Also test the serializer. Call htmlDocContentDumpOutput with our + * own buffer to avoid encoding the output. The HTML encoding is + * excruciatingly slow (see htmlEntityValueLookup). + */ + out = xmlAllocOutputBuffer(NULL); + htmlDocContentDumpOutput(out, doc, NULL); + xmlOutputBufferClose(out); + + xmlFreeDoc(doc); + + /* Push parser */ + + ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, + XML_CHAR_ENCODING_NONE); + htmlCtxtUseOptions(ctxt, opts); + + for (consumed = 0; consumed < docSize; consumed += chunkSize) { + chunkSize = docSize - consumed; + if (chunkSize > maxChunkSize) + chunkSize = maxChunkSize; + htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0); + } + + htmlParseChunk(ctxt, NULL, 0, 1); + xmlFreeDoc(ctxt->myDoc); + htmlFreeParserCtxt(ctxt); + + /* Cleanup */ + + xmlFuzzDataCleanup(); + xmlResetLastError(); + + return(0); +} + diff --git a/third_party/libxml/src/fuzz/html.dict b/third_party/libxml/src/fuzz/html.dict new file mode 100644 index 000000000000..80444c263c47 --- /dev/null +++ b/third_party/libxml/src/fuzz/html.dict @@ -0,0 +1,124 @@ +elem_a="" +elem_abbr="" +elem_acronym="" +elem_address="
" +elem_applet="" +elem_area="" +elem_b="" +elem_base="" +elem_basefont="" +elem_bdo="" +elem_big="" +elem_blockquote="
" +elem_body="" +elem_br="
" +elem_button="" +elem_caption="" +elem_center="
" +elem_cite="" +elem_code="" +elem_col="" +elem_colgroup="" +elem_dd="
" +elem_del="" +elem_dfn="" +elem_dir="" +elem_div="
" +elem_dl="
" +elem_dt="
" +elem_em="" +elem_embed="" +elem_fieldset="
" +elem_font="" +elem_form="
" +elem_frame="" +elem_frameset="" +elem_h1="

" +elem_h2="

" +elem_h3="

" +elem_h4="

" +elem_h5="
" +elem_h6="
" +elem_head="" +elem_hr="
" +elem_html="" +elem_i="" +elem_iframe="" +elem_img="" +elem_input="" +elem_ins="" +elem_isindex="" +elem_kbd="" +elem_label="" +elem_legend="" +elem_li="
  • " +elem_link="" +elem_map="" +elem_menu="" +elem_meta="" +elem_noframes="" +elem_noscript="" +elem_object="" +elem_ol="
      " +elem_optgroup="" +elem_option="" +elem_p="

      " +elem_param="" +elem_pre="
      "
      +elem_q=""
      +elem_s=""
      +elem_samp=""
      +elem_script=""
      +elem_select=""
      +elem_small=""
      +elem_span=""
      +elem_strike=""
      +elem_strong=""
      +elem_style=""
      +elem_sub=""
      +elem_sup=""
      +elem_table="
      " +elem_tbody="" +elem_td="" +elem_textarea="" +elem_tfoot="" +elem_th="" +elem_thead="" +elem_title="" +elem_tr="" +elem_tt="" +elem_u="" +elem_ul="
        " +elem_var="" + +attr_id=" id=\"\"" +attr_style=" style=\"\"" + +comment="" + +doctype="" +doctype_system="" +doctype_public="" + +pi="" + +ref_lt="<" +ref_gt=">" +ref_amp="&" +ref_apos="'" +ref_quot=""" +ref_dec=" " +ref_hex=" " + +cs_utf8="UTF-8" +cs_utf16="UTF-16" +cs_utf16le="UTF-16LE" +cs_utf16be="UTF-16BE" +cs_ucs2="UCS-2" +cs_ucs4="UCS-4" +cs_latin1="ISO-8859-1" +cs_ascii="ASCII" +cs_ebcdic="EBCDIC" +cs_iso2022jp="ISO-2022-JP" +cs_shift_jis="SHIFT_JIS" +cs_euc_jp="EUC-JP" diff --git a/third_party/libxml/src/fuzz/regexp.c b/third_party/libxml/src/fuzz/regexp.c new file mode 100644 index 000000000000..af1210aa17b6 --- /dev/null +++ b/third_party/libxml/src/fuzz/regexp.c @@ -0,0 +1,47 @@ +/* + * regexp.c: a libFuzzer target to test the regexp module. + * + * See Copyright for the status of this software. + */ + +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlRegexpPtr regexp; + char *str[2] = { NULL, NULL }; + size_t numStrings; + + if (size > 200) + return(0); + + numStrings = xmlFuzzExtractStrings(data, size, str, 2); + + /* CUR_SCHAR doesn't handle invalid UTF-8 and may cause infinite loops. */ + if (xmlCheckUTF8(BAD_CAST str[0]) != 0) { + regexp = xmlRegexpCompile(BAD_CAST str[0]); + /* xmlRegexpExec has pathological performance in too many cases. */ +#if 0 + if ((regexp != NULL) && (numStrings >= 2)) { + xmlRegexpExec(regexp, BAD_CAST str[1]); + } +#endif + xmlRegFreeRegexp(regexp); + } + + xmlFree(str[0]); + xmlFree(str[1]); + xmlResetLastError(); + + return 0; +} + diff --git a/third_party/libxml/src/fuzz/regexp.dict b/third_party/libxml/src/fuzz/regexp.dict new file mode 100644 index 000000000000..30d666dcf3eb --- /dev/null +++ b/third_party/libxml/src/fuzz/regexp.dict @@ -0,0 +1,155 @@ +quant_any="*" +quant_opt="?" +quant_some="+" +quant_num="{1,2}" + +dot="." +branch="|a" +parens="()" +parens_inner=")(" +pos_group="[a]" +neg_group="[^a]" +class_subtraction="[a-[b]]" + +esc_space="\\s" +esc_initial="\\i" +esc_name="\\c" +esc_digit="\\d" +esc_word="\\w" + +cat_letter="\\p{L}" +cat_mark="\\p{M}" +cat_number="\\p{N}" +cat_punct="\\p{P}" +cat_sym="\\p{S}" +cat_sep="\\p{Z}" +cat_other="\\p{C}" + +block_aegean_numbers="\\p{IsAegeanNumbers}" +block_alphabetic_presentation_forms="\\p{IsAlphabeticPresentationForms}" +block_arabic="\\p{IsArabic}" +block_arabic_presentation_forms_a="\\p{IsArabicPresentationFormsA}" +block_arabic_presentation_forms_b="\\p{IsArabicPresentationFormsB}" +block_armenian="\\p{IsArmenian}" +block_arrows="\\p{IsArrows}" +block_basic_latin="\\p{IsBasicLatin}" +block_bengali="\\p{IsBengali}" +block_block_elements="\\p{IsBlockElements}" +block_bopomofo="\\p{IsBopomofo}" +block_bopomofo_extended="\\p{IsBopomofoExtended}" +block_box_drawing="\\p{IsBoxDrawing}" +block_braille_patterns="\\p{IsBraillePatterns}" +block_buhid="\\p{IsBuhid}" +block_byzantine_musical_symbols="\\p{IsByzantineMusicalSymbols}" +block_c_j_k_compatibility="\\p{IsCJKCompatibility}" +block_c_j_k_compatibility_forms="\\p{IsCJKCompatibilityForms}" +block_c_j_k_compatibility_ideographs="\\p{IsCJKCompatibilityIdeographs}" +block_c_j_k_compatibility_ideographs_supplement="\\p{IsCJKCompatibilityIdeographsSupplement}" +block_c_j_k_radicals_supplement="\\p{IsCJKRadicalsSupplement}" +block_c_j_k_symbolsand_punctuation="\\p{IsCJKSymbolsandPunctuation}" +block_c_j_k_unified_ideographs="\\p{IsCJKUnifiedIdeographs}" +block_c_j_k_unified_ideographs_extension_a="\\p{IsCJKUnifiedIdeographsExtensionA}" +block_cjk_unified_ideographs_extension_b="\\p{IsCJKUnifiedIdeographsExtensionB}" +block_cherokee="\\p{IsCherokee}" +block_combining_diacritical_marks="\\p{IsCombiningDiacriticalMarks}" +block_combining_diacritical_marksfor_symbols="\\p{IsCombiningDiacriticalMarksforSymbols}" +block_combining_half_marks="\\p{IsCombiningHalfMarks}" +block_combining_marksfor_symbols="\\p{IsCombiningMarksforSymbols}" +block_control_pictures="\\p{IsControlPictures}" +block_currency_symbols="\\p{IsCurrencySymbols}" +block_cypriot_syllabary="\\p{IsCypriotSyllabary}" +block_cyrillic="\\p{IsCyrillic}" +block_cyrillic_supplement="\\p{IsCyrillicSupplement}" +block_deseret="\\p{IsDeseret}" +block_devanagari="\\p{IsDevanagari}" +block_dingbats="\\p{IsDingbats}" +block_enclosed_alphanumerics="\\p{IsEnclosedAlphanumerics}" +block_enclosed_cjk_lettersand_months="\\p{IsEnclosedCJKLettersandMonths}" +block_ethiopic="\\p{IsEthiopic}" +block_general_punctuation="\\p{IsGeneralPunctuation}" +block_geometric_shapes="\\p{IsGeometricShapes}" +block_georgian="\\p{IsGeorgian}" +block_gothic="\\p{IsGothic}" +block_greek="\\p{IsGreek}" +block_greek_extended="\\p{IsGreekExtended}" +block_greekand_coptic="\\p{IsGreekandCoptic}" +block_gujarati="\\p{IsGujarati}" +block_gurmukhi="\\p{IsGurmukhi}" +block_halfwidthand_fullwidth_forms="\\p{IsHalfwidthandFullwidthForms}" +block_hangul_compatibility_jamo="\\p{IsHangulCompatibilityJamo}" +block_hangul_jamo="\\p{IsHangulJamo}" +block_hangul_syllables="\\p{IsHangulSyllables}" +block_hanunoo="\\p{IsHanunoo}" +block_hebrew="\\p{IsHebrew}" +block_high_private_use_surrogates="\\p{IsHighPrivateUseSurrogates}" +block_high_surrogates="\\p{IsHighSurrogates}" +block_hiragana="\\p{IsHiragana}" +block_ipa_extensions="\\p{IsIPAExtensions}" +block_ideographic_description_characters="\\p{IsIdeographicDescriptionCharacters}" +block_kanbun="\\p{IsKanbun}" +block_kangxi_radicals="\\p{IsKangxiRadicals}" +block_kannada="\\p{IsKannada}" +block_katakana="\\p{IsKatakana}" +block_katakana_phonetic_extensions="\\p{IsKatakanaPhoneticExtensions}" +block_khmer="\\p{IsKhmer}" +block_khmer_symbols="\\p{IsKhmerSymbols}" +block_lao="\\p{IsLao}" +block_latin1Supplement="\\p{IsLatin1Supplement}" +block_latin_extended_a="\\p{IsLatinExtendedA}" +block_latin_extended_b="\\p{IsLatinExtendedB}" +block_latin_extended_additional="\\p{IsLatinExtendedAdditional}" +block_letterlike_symbols="\\p{IsLetterlikeSymbols}" +block_limbu="\\p{IsLimbu}" +block_linear_b_ideograms="\\p{IsLinearBIdeograms}" +block_linear_b_syllabary="\\p{IsLinearBSyllabary}" +block_low_surrogates="\\p{IsLowSurrogates}" +block_malayalam="\\p{IsMalayalam}" +block_mathematical_alphanumeric_symbols="\\p{IsMathematicalAlphanumericSymbols}" +block_mathematical_operators="\\p{IsMathematicalOperators}" +block_miscellaneous_mathematical_symbols_a="\\p{IsMiscellaneousMathematicalSymbolsA}" +block_miscellaneous_mathematical_symbols_b="\\p{IsMiscellaneousMathematicalSymbolsB}" +block_miscellaneous_symbols="\\p{IsMiscellaneousSymbols}" +block_miscellaneous_symbolsand_arrows="\\p{IsMiscellaneousSymbolsandArrows}" +block_miscellaneous_technical="\\p{IsMiscellaneousTechnical}" +block_mongolian="\\p{IsMongolian}" +block_musical_symbols="\\p{IsMusicalSymbols}" +block_myanmar="\\p{IsMyanmar}" +block_number_forms="\\p{IsNumberForms}" +block_ogham="\\p{IsOgham}" +block_old_italic="\\p{IsOldItalic}" +block_optical_character_recognition="\\p{IsOpticalCharacterRecognition}" +block_oriya="\\p{IsOriya}" +block_osmanya="\\p{IsOsmanya}" +block_phonetic_extensions="\\p{IsPhoneticExtensions}" +block_private_use="\\p{IsPrivateUse}" +block_private_use_area="\\p{IsPrivateUseArea}" +block_runic="\\p{IsRunic}" +block_shavian="\\p{IsShavian}" +block_sinhala="\\p{IsSinhala}" +block_small_form_variants="\\p{IsSmallFormVariants}" +block_spacing_modifier_letters="\\p{IsSpacingModifierLetters}" +block_specials="\\p{IsSpecials}" +block_superscriptsand_subscripts="\\p{IsSuperscriptsandSubscripts}" +block_supplemental_arrows_a="\\p{IsSupplementalArrowsA}" +block_supplemental_arrows_b="\\p{IsSupplementalArrowsB}" +block_supplemental_mathematical_operators="\\p{IsSupplementalMathematicalOperators}" +block_supplementary_private_use_area_a="\\p{IsSupplementaryPrivateUseAreaA}" +block_supplementary_private_use_area_b="\\p{IsSupplementaryPrivateUseAreaB}" +block_syriac="\\p{IsSyriac}" +block_tagalog="\\p{IsTagalog}" +block_tagbanwa="\\p{IsTagbanwa}" +block_tags="\\p{IsTags}" +block_tai_le="\\p{IsTaiLe}" +block_tai_xuan_jing_symbols="\\p{IsTaiXuanJingSymbols}" +block_tamil="\\p{IsTamil}" +block_telugu="\\p{IsTelugu}" +block_thaana="\\p{IsThaana}" +block_thai="\\p{IsThai}" +block_tibetan="\\p{IsTibetan}" +block_ugaritic="\\p{IsUgaritic}" +block_unified_canadian_aboriginal_syllabics="\\p{IsUnifiedCanadianAboriginalSyllabics}" +block_variation_selectors="\\p{IsVariationSelectors}" +block_variation_selectors_supplement="\\p{IsVariationSelectorsSupplement}" +block_yi_radicals="\\p{IsYiRadicals}" +block_yi_syllables="\\p{IsYiSyllables}" +block_yijing_hexagram_symbols="\\p{IsYijingHexagramSymbols}" diff --git a/third_party/libxml/src/fuzz/schema.c b/third_party/libxml/src/fuzz/schema.c new file mode 100644 index 000000000000..689bffe3df1e --- /dev/null +++ b/third_party/libxml/src/fuzz/schema.c @@ -0,0 +1,44 @@ +/* + * schema.c: a libFuzzer target to test the XML Schema processor. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); +#ifdef LIBXML_CATALOG_ENABLED + xmlInitializeCatalog(); +#endif + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlSchemaParserCtxtPtr pctxt; + + if (size > 50000) + return(0); + + xmlFuzzDataInit(data, size); + xmlFuzzReadEntities(); + + pctxt = xmlSchemaNewParserCtxt(xmlFuzzMainUrl()); + xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL); + xmlSchemaFree(xmlSchemaParse(pctxt)); + xmlSchemaFreeParserCtxt(pctxt); + + xmlFuzzDataCleanup(); + xmlResetLastError(); + + return(0); +} + diff --git a/third_party/libxml/src/fuzz/schema.dict b/third_party/libxml/src/fuzz/schema.dict new file mode 100644 index 000000000000..9a8fd386e554 --- /dev/null +++ b/third_party/libxml/src/fuzz/schema.dict @@ -0,0 +1,55 @@ +# TODO: Add more language elements + +xs_annotation="" + +xs_attribute="" +xs_attribute_required="" +xs_element="" + +# Primitive datatypes +type_string=" type='xs:string'" +type_boolean=" type='xs:boolean'" +type_decimal=" type='xs:decimal'" +type_float=" type='xs:float'" +type_double=" type='xs:double'" +type_date_time=" type='xs:dateTime'" +type_time=" type='xs:time'" +type_date=" type='xs:date'" +type_g_year_month=" type='xs:gYearMonth'" +type_g_year=" type='xs:gYear'" +type_g_month_day=" type='xs:gMonthDay'" +type_g_day=" type='xs:gDay'" +type_g_month=" type='xs:gMonth'" +type_hex_binary=" type='xs:hexBinary'" +type_base64_binary=" type='xs:base64Binary'" +type_any_uri=" type='xs:anyURI'" +type_qname=" type='xs:QName'" +type_notation=" type='xs:NOTATION'" + +# Occurs +occurs_min=" minOccurs='1'" +occurs_max=" maxOccurs='9'" +occurs_max_unbounded=" maxOccurs='unbounded'" + +# Simple type +xs_restriction_integer="" +xs_restriction_string="" +xs_list="" +xs_union="" + +# Restrictions +xs_min_exclusive="" +xs_min_inclusive="" +xs_max_exclusive="" +xs_max_inclusive="" +xs_total_digits="" +xs_fraction_digits="" +xs_length="" +xs_min_length="" +xs_max_length="" +xs_enumeration="" +xs_white_space_collapse="" +xs_white_space_preserve="" +xs_white_space_replace="" +xs_pattern="" + diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-1 b/third_party/libxml/src/fuzz/seed/regexp/branch-1 new file mode 100644 index 000000000000..ded775eaf70f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-10 b/third_party/libxml/src/fuzz/seed/regexp/branch-10 new file mode 100644 index 000000000000..6700d77519c0 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-11 b/third_party/libxml/src/fuzz/seed/regexp/branch-11 new file mode 100644 index 000000000000..d83f91819c60 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-11 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-12 b/third_party/libxml/src/fuzz/seed/regexp/branch-12 new file mode 100644 index 000000000000..b44dba573226 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-12 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-13 b/third_party/libxml/src/fuzz/seed/regexp/branch-13 new file mode 100644 index 000000000000..64e50a000089 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-13 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-2 b/third_party/libxml/src/fuzz/seed/regexp/branch-2 new file mode 100644 index 000000000000..8293d81dbfba Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-3 b/third_party/libxml/src/fuzz/seed/regexp/branch-3 new file mode 100644 index 000000000000..696af9bed4d7 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-4 b/third_party/libxml/src/fuzz/seed/regexp/branch-4 new file mode 100644 index 000000000000..83179988e4d3 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-5 b/third_party/libxml/src/fuzz/seed/regexp/branch-5 new file mode 100644 index 000000000000..6b6db8b981e9 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-6 b/third_party/libxml/src/fuzz/seed/regexp/branch-6 new file mode 100644 index 000000000000..4f477902eec6 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-7 b/third_party/libxml/src/fuzz/seed/regexp/branch-7 new file mode 100644 index 000000000000..6334f7254a13 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-8 b/third_party/libxml/src/fuzz/seed/regexp/branch-8 new file mode 100644 index 000000000000..f77a8f4d8dde Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/branch-9 b/third_party/libxml/src/fuzz/seed/regexp/branch-9 new file mode 100644 index 000000000000..acd0eeca2d9f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/branch-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-1 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-1 new file mode 100644 index 000000000000..9f0a504abee1 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-10 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-10 new file mode 100644 index 000000000000..60685bbe5a4f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-11 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-11 new file mode 100644 index 000000000000..72a7956cbafe Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-11 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-12 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-12 new file mode 100644 index 000000000000..85416ee2cdd7 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-12 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-13 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-13 new file mode 100644 index 000000000000..c91d4fea9140 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-13 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-14 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-14 new file mode 100644 index 000000000000..a164b4236842 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-14 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-15 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-15 new file mode 100644 index 000000000000..750c76dedc5e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-15 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-16 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-16 new file mode 100644 index 000000000000..23c5d230741b Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-16 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-2 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-2 new file mode 100644 index 000000000000..5468d06dc298 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-3 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-3 new file mode 100644 index 000000000000..76e1e0bc3713 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-4 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-4 new file mode 100644 index 000000000000..e0f65a422d5e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-5 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-5 new file mode 100644 index 000000000000..fcfaa97a11ff Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-6 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-6 new file mode 100644 index 000000000000..ce00a15bb2d8 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-7 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-7 new file mode 100644 index 000000000000..127fe1f64233 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-8 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-8 new file mode 100644 index 000000000000..fe8bb8b2d1d3 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug316338-9 b/third_party/libxml/src/fuzz/seed/regexp/bug316338-9 new file mode 100644 index 000000000000..3d56e5d7839f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug316338-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-1 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-1 new file mode 100644 index 000000000000..4426933e7e65 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-2 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-2 new file mode 100644 index 000000000000..474d2b6e4315 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-3 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-3 new file mode 100644 index 000000000000..09c75cb627e4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-4 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-4 new file mode 100644 index 000000000000..65d561eaebaf Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-5 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-5 new file mode 100644 index 000000000000..b6785803fe26 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-6 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-6 new file mode 100644 index 000000000000..3a05d82ffa83 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-7 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-7 new file mode 100644 index 000000000000..88e166051902 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/bug420596-8 b/third_party/libxml/src/fuzz/seed/regexp/bug420596-8 new file mode 100644 index 000000000000..4575a9253f6d Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/bug420596-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-1 b/third_party/libxml/src/fuzz/seed/regexp/content-1 new file mode 100644 index 000000000000..5acbf8643454 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-10 b/third_party/libxml/src/fuzz/seed/regexp/content-10 new file mode 100644 index 000000000000..f131454caba6 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-2 b/third_party/libxml/src/fuzz/seed/regexp/content-2 new file mode 100644 index 000000000000..4e6b663ea0ce Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-3 b/third_party/libxml/src/fuzz/seed/regexp/content-3 new file mode 100644 index 000000000000..b13fc8dbeaa9 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-4 b/third_party/libxml/src/fuzz/seed/regexp/content-4 new file mode 100644 index 000000000000..47c5d6de6ce9 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-5 b/third_party/libxml/src/fuzz/seed/regexp/content-5 new file mode 100644 index 000000000000..f93860ebccef Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-6 b/third_party/libxml/src/fuzz/seed/regexp/content-6 new file mode 100644 index 000000000000..e5c6e14b5721 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-7 b/third_party/libxml/src/fuzz/seed/regexp/content-7 new file mode 100644 index 000000000000..4868dd2fd280 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-8 b/third_party/libxml/src/fuzz/seed/regexp/content-8 new file mode 100644 index 000000000000..a3a87d0bdd0d Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/content-9 b/third_party/libxml/src/fuzz/seed/regexp/content-9 new file mode 100644 index 000000000000..91f0d9e99098 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/content-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-1 b/third_party/libxml/src/fuzz/seed/regexp/hard-1 new file mode 100644 index 000000000000..ba00382ec76b Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-10 b/third_party/libxml/src/fuzz/seed/regexp/hard-10 new file mode 100644 index 000000000000..7db28fa5fee4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-2 b/third_party/libxml/src/fuzz/seed/regexp/hard-2 new file mode 100644 index 000000000000..ed38b91bc8e1 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-3 b/third_party/libxml/src/fuzz/seed/regexp/hard-3 new file mode 100644 index 000000000000..7b16da0ca177 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-4 b/third_party/libxml/src/fuzz/seed/regexp/hard-4 new file mode 100644 index 000000000000..2ece886a771b Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-5 b/third_party/libxml/src/fuzz/seed/regexp/hard-5 new file mode 100644 index 000000000000..870a3ec533fb Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-6 b/third_party/libxml/src/fuzz/seed/regexp/hard-6 new file mode 100644 index 000000000000..06aa7d0d15a1 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-7 b/third_party/libxml/src/fuzz/seed/regexp/hard-7 new file mode 100644 index 000000000000..50a9ec39520f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-8 b/third_party/libxml/src/fuzz/seed/regexp/hard-8 new file mode 100644 index 000000000000..0991129f60f5 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/hard-9 b/third_party/libxml/src/fuzz/seed/regexp/hard-9 new file mode 100644 index 000000000000..5bd1d89038a0 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/hard-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ncname-1 b/third_party/libxml/src/fuzz/seed/regexp/ncname-1 new file mode 100644 index 000000000000..608eb9a94bb3 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ncname-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ncname-2 b/third_party/libxml/src/fuzz/seed/regexp/ncname-2 new file mode 100644 index 000000000000..cfb9b960c6c5 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ncname-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ncname-3 b/third_party/libxml/src/fuzz/seed/regexp/ncname-3 new file mode 100644 index 000000000000..07a6a0818341 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ncname-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ncname-4 b/third_party/libxml/src/fuzz/seed/regexp/ncname-4 new file mode 100644 index 000000000000..87e937f409e4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ncname-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ncname-5 b/third_party/libxml/src/fuzz/seed/regexp/ncname-5 new file mode 100644 index 000000000000..ad2945609042 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ncname-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-1 b/third_party/libxml/src/fuzz/seed/regexp/ranges-1 new file mode 100644 index 000000000000..71448f239691 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-10 b/third_party/libxml/src/fuzz/seed/regexp/ranges-10 new file mode 100644 index 000000000000..91aed3cf5a58 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-11 b/third_party/libxml/src/fuzz/seed/regexp/ranges-11 new file mode 100644 index 000000000000..76eb5deb2356 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-11 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-12 b/third_party/libxml/src/fuzz/seed/regexp/ranges-12 new file mode 100644 index 000000000000..9c3bc663b926 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-12 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-2 b/third_party/libxml/src/fuzz/seed/regexp/ranges-2 new file mode 100644 index 000000000000..9369f7a513ce Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-3 b/third_party/libxml/src/fuzz/seed/regexp/ranges-3 new file mode 100644 index 000000000000..58a3a081d619 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-4 b/third_party/libxml/src/fuzz/seed/regexp/ranges-4 new file mode 100644 index 000000000000..da7e9dabd4ba Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-5 b/third_party/libxml/src/fuzz/seed/regexp/ranges-5 new file mode 100644 index 000000000000..83ad4a82a717 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-6 b/third_party/libxml/src/fuzz/seed/regexp/ranges-6 new file mode 100644 index 000000000000..3bc9758f9a88 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-7 b/third_party/libxml/src/fuzz/seed/regexp/ranges-7 new file mode 100644 index 000000000000..fa8903840b38 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-8 b/third_party/libxml/src/fuzz/seed/regexp/ranges-8 new file mode 100644 index 000000000000..96f0bb690f24 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges-9 b/third_party/libxml/src/fuzz/seed/regexp/ranges-9 new file mode 100644 index 000000000000..8e3fc43a4eaa Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-1 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-1 new file mode 100644 index 000000000000..044a8eb93354 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-10 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-10 new file mode 100644 index 000000000000..19e2aa2db073 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-11 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-11 new file mode 100644 index 000000000000..89be181da33c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-11 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-12 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-12 new file mode 100644 index 000000000000..42ebdd31742f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-12 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-2 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-2 new file mode 100644 index 000000000000..026f7b8476c6 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-3 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-3 new file mode 100644 index 000000000000..83e78a9c4d1c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-4 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-4 new file mode 100644 index 000000000000..847b4e8425d2 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-5 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-5 new file mode 100644 index 000000000000..349168d35846 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-6 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-6 new file mode 100644 index 000000000000..5d2a407610e4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-7 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-7 new file mode 100644 index 000000000000..74fbafb4e214 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-8 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-8 new file mode 100644 index 000000000000..125bfa91fcaf Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/ranges2-9 b/third_party/libxml/src/fuzz/seed/regexp/ranges2-9 new file mode 100644 index 000000000000..f2cf128831dc Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/ranges2-9 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-1 b/third_party/libxml/src/fuzz/seed/regexp/xpath-1 new file mode 100644 index 000000000000..3bc17926cc6a Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-1 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-10 b/third_party/libxml/src/fuzz/seed/regexp/xpath-10 new file mode 100644 index 000000000000..e4f4b0cdde3e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-10 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-11 b/third_party/libxml/src/fuzz/seed/regexp/xpath-11 new file mode 100644 index 000000000000..318e0ccf39b4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-11 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-12 b/third_party/libxml/src/fuzz/seed/regexp/xpath-12 new file mode 100644 index 000000000000..f204295b26de Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-12 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-13 b/third_party/libxml/src/fuzz/seed/regexp/xpath-13 new file mode 100644 index 000000000000..70fccd591753 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-13 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-14 b/third_party/libxml/src/fuzz/seed/regexp/xpath-14 new file mode 100644 index 000000000000..357ce2b5627e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-14 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-15 b/third_party/libxml/src/fuzz/seed/regexp/xpath-15 new file mode 100644 index 000000000000..2a10a8370296 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-15 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-16 b/third_party/libxml/src/fuzz/seed/regexp/xpath-16 new file mode 100644 index 000000000000..1f3089fb974c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-16 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-17 b/third_party/libxml/src/fuzz/seed/regexp/xpath-17 new file mode 100644 index 000000000000..a9d542fb6f1b Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-17 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-18 b/third_party/libxml/src/fuzz/seed/regexp/xpath-18 new file mode 100644 index 000000000000..651eb9d4c3a8 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-18 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-19 b/third_party/libxml/src/fuzz/seed/regexp/xpath-19 new file mode 100644 index 000000000000..fefea8f1e6c1 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-19 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-2 b/third_party/libxml/src/fuzz/seed/regexp/xpath-2 new file mode 100644 index 000000000000..81e5fba0a9f4 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-2 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-20 b/third_party/libxml/src/fuzz/seed/regexp/xpath-20 new file mode 100644 index 000000000000..1f3089fb974c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-20 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-21 b/third_party/libxml/src/fuzz/seed/regexp/xpath-21 new file mode 100644 index 000000000000..706a702576f1 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-21 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-22 b/third_party/libxml/src/fuzz/seed/regexp/xpath-22 new file mode 100644 index 000000000000..a246f84cc61c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-22 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-23 b/third_party/libxml/src/fuzz/seed/regexp/xpath-23 new file mode 100644 index 000000000000..02753beb3e42 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-23 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-24 b/third_party/libxml/src/fuzz/seed/regexp/xpath-24 new file mode 100644 index 000000000000..331105cdce1a Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-24 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-25 b/third_party/libxml/src/fuzz/seed/regexp/xpath-25 new file mode 100644 index 000000000000..ce3da4436759 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-25 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-26 b/third_party/libxml/src/fuzz/seed/regexp/xpath-26 new file mode 100644 index 000000000000..b3bf8c23dd7c Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-26 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-27 b/third_party/libxml/src/fuzz/seed/regexp/xpath-27 new file mode 100644 index 000000000000..74bbe4680aec Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-27 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-28 b/third_party/libxml/src/fuzz/seed/regexp/xpath-28 new file mode 100644 index 000000000000..b38a709e791e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-28 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-29 b/third_party/libxml/src/fuzz/seed/regexp/xpath-29 new file mode 100644 index 000000000000..104d4e54a151 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-29 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-3 b/third_party/libxml/src/fuzz/seed/regexp/xpath-3 new file mode 100644 index 000000000000..6d7be85f2d0e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-3 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-30 b/third_party/libxml/src/fuzz/seed/regexp/xpath-30 new file mode 100644 index 000000000000..b681ff14ac56 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-30 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-31 b/third_party/libxml/src/fuzz/seed/regexp/xpath-31 new file mode 100644 index 000000000000..cd87b0e8d35a Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-31 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-32 b/third_party/libxml/src/fuzz/seed/regexp/xpath-32 new file mode 100644 index 000000000000..c5cac32ab94e Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-32 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-33 b/third_party/libxml/src/fuzz/seed/regexp/xpath-33 new file mode 100644 index 000000000000..89e3fcdcc841 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-33 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-34 b/third_party/libxml/src/fuzz/seed/regexp/xpath-34 new file mode 100644 index 000000000000..b65a3d6ff3a2 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-34 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-35 b/third_party/libxml/src/fuzz/seed/regexp/xpath-35 new file mode 100644 index 000000000000..252a70c2eb2d Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-35 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-4 b/third_party/libxml/src/fuzz/seed/regexp/xpath-4 new file mode 100644 index 000000000000..30718c579cb8 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-4 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-5 b/third_party/libxml/src/fuzz/seed/regexp/xpath-5 new file mode 100644 index 000000000000..06ad88ef4e89 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-5 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-6 b/third_party/libxml/src/fuzz/seed/regexp/xpath-6 new file mode 100644 index 000000000000..6678772837df Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-6 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-7 b/third_party/libxml/src/fuzz/seed/regexp/xpath-7 new file mode 100644 index 000000000000..e69ad856f119 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-7 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-8 b/third_party/libxml/src/fuzz/seed/regexp/xpath-8 new file mode 100644 index 000000000000..a8120ccdcc33 Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-8 differ diff --git a/third_party/libxml/src/fuzz/seed/regexp/xpath-9 b/third_party/libxml/src/fuzz/seed/regexp/xpath-9 new file mode 100644 index 000000000000..c037ce7fbd8d Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/regexp/xpath-9 differ diff --git a/third_party/libxml/src/fuzz/seed/uri/dot b/third_party/libxml/src/fuzz/seed/uri/dot new file mode 100644 index 000000000000..945c9b46d684 --- /dev/null +++ b/third_party/libxml/src/fuzz/seed/uri/dot @@ -0,0 +1 @@ +. \ No newline at end of file diff --git a/third_party/libxml/src/fuzz/seed/uri/full b/third_party/libxml/src/fuzz/seed/uri/full new file mode 100644 index 000000000000..808e58a1522f Binary files /dev/null and b/third_party/libxml/src/fuzz/seed/uri/full differ diff --git a/third_party/libxml/src/fuzz/testFuzzer.c b/third_party/libxml/src/fuzz/testFuzzer.c new file mode 100644 index 000000000000..b0c7ffbc9b7c --- /dev/null +++ b/third_party/libxml/src/fuzz/testFuzzer.c @@ -0,0 +1,188 @@ +/* + * testFuzzer.c: Test program for the custom entity loader used to fuzz + * with multiple inputs. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include +#include "fuzz.h" + +#ifdef HAVE_HTML_FUZZER + #define LLVMFuzzerInitialize fuzzHtmlInit + #define LLVMFuzzerTestOneInput fuzzHtml + #include "html.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +#ifdef HAVE_REGEXP_FUZZER + #define LLVMFuzzerInitialize fuzzRegexpInit + #define LLVMFuzzerTestOneInput fuzzRegexp + #include "regexp.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +#ifdef HAVE_SCHEMA_FUZZER + #define LLVMFuzzerInitialize fuzzSchemaInit + #define LLVMFuzzerTestOneInput fuzzSchema + #include "schema.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +#ifdef HAVE_URI_FUZZER + #define LLVMFuzzerInitialize fuzzUriInit + #define LLVMFuzzerTestOneInput fuzzUri + #include "uri.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +#ifdef HAVE_XML_FUZZER + #define LLVMFuzzerInitialize fuzzXmlInit + #define LLVMFuzzerTestOneInput fuzzXml + #include "xml.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +#ifdef HAVE_XPATH_FUZZER + #define LLVMFuzzerInitialize fuzzXPathInit + #define LLVMFuzzerTestOneInput fuzzXPath + #include "xpath.c" + #undef LLVMFuzzerInitialize + #undef LLVMFuzzerTestOneInput +#endif + +typedef int +(*initFunc)(int *argc, char ***argv); +typedef int +(*fuzzFunc)(const char *data, size_t size); + +int numInputs; + +static int +testFuzzer(initFunc init, fuzzFunc fuzz, const char *pattern) { + glob_t globbuf; + int ret = -1; + int i; + + if (glob(pattern, 0, NULL, &globbuf) != 0) { + fprintf(stderr, "pattern %s matches no files\n", pattern); + return(-1); + } + + if (init != NULL) + init(NULL, NULL); + + for (i = 0; i < globbuf.gl_pathc; i++) { + const char *path = globbuf.gl_pathv[i]; + char *data; + size_t size; + + data = xmlSlurpFile(path, &size); + if (data == NULL) { + fprintf(stderr, "couldn't read %s\n", path); + goto error; + } + fuzz(data, size); + xmlFree(data); + + numInputs++; + } + + ret = 0; +error: + globfree(&globbuf); + return(ret); +} + +#ifdef HAVE_XML_FUZZER +static int +testEntityLoader() { + static const char data[] = + "doc.xml\\\n" + "\n" + "&ent;\\\n" + "doc.dtd\\\n" + "\n" + "\\\n" + "ent.txt\\\n" + "Hello, world!\\\n"; + static xmlChar expected[] = + "\n" + "\n" + "Hello, world!\n"; + const char *docBuffer; + size_t docSize; + xmlDocPtr doc; + xmlChar *out; + int ret = 0; + + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + xmlFuzzDataInit(data, sizeof(data) - 1); + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, + XML_PARSE_NOENT | XML_PARSE_DTDLOAD); + + xmlDocDumpMemory(doc, &out, NULL); + if (xmlStrcmp(out, expected) != 0) { + fprintf(stderr, "Expected:\n%sGot:\n%s", expected, out); + ret = 1; + } + + xmlFree(out); + xmlFreeDoc(doc); + xmlFuzzDataCleanup(); + + return(ret); +} +#endif + +int +main() { + int ret = 0; + +#ifdef HAVE_XML_FUZZER + if (testEntityLoader() != 0) + ret = 1; +#endif +#ifdef HAVE_HTML_FUZZER + if (testFuzzer(fuzzHtmlInit, fuzzHtml, "seed/html/*") != 0) + ret = 1; +#endif +#ifdef HAVE_REGEXP_FUZZER + if (testFuzzer(fuzzRegexpInit, fuzzRegexp, "seed/regexp/*") != 0) + ret = 1; +#endif +#ifdef HAVE_SCHEMA_FUZZER + if (testFuzzer(fuzzSchemaInit, fuzzSchema, "seed/schema/*") != 0) + ret = 1; +#endif +#ifdef HAVE_URI_FUZZER + if (testFuzzer(NULL, fuzzUri, "seed/uri/*") != 0) + ret = 1; +#endif +#ifdef HAVE_XML_FUZZER + if (testFuzzer(fuzzXmlInit, fuzzXml, "seed/xml/*") != 0) + ret = 1; +#endif +#ifdef HAVE_XPATH_FUZZER + if (testFuzzer(fuzzXPathInit, fuzzXPath, "seed/xpath/*") != 0) + ret = 1; +#endif + + if (ret == 0) + printf("Successfully tested %d inputs\n", numInputs); + + return(ret); +} + diff --git a/third_party/libxml/src/fuzz/uri.c b/third_party/libxml/src/fuzz/uri.c new file mode 100644 index 000000000000..5e4c099cee50 --- /dev/null +++ b/third_party/libxml/src/fuzz/uri.c @@ -0,0 +1,48 @@ +/* + * uri.c: a libFuzzer target to test the URI module. + * + * See Copyright for the status of this software. + */ + +#include +#include "fuzz.h" + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlURIPtr uri; + char *str[2] = { NULL, NULL }; + size_t numStrings; + + if (size > 10000) + return(0); + + numStrings = xmlFuzzExtractStrings(data, size, str, 2); + + uri = xmlParseURI(str[0]); + xmlFree(xmlSaveUri(uri)); + xmlFreeURI(uri); + + uri = xmlParseURIRaw(str[0], 1); + xmlFree(xmlSaveUri(uri)); + xmlFreeURI(uri); + + xmlFree(xmlURIUnescapeString(str[0], -1, NULL)); + xmlFree(xmlURIEscape(BAD_CAST str[0])); + xmlFree(xmlCanonicPath(BAD_CAST str[0])); + xmlFree(xmlPathToURI(BAD_CAST str[0])); + + if (numStrings >= 2) { + xmlFree(xmlBuildURI(BAD_CAST str[1], BAD_CAST str[0])); + xmlFree(xmlBuildRelativeURI(BAD_CAST str[1], BAD_CAST str[0])); + xmlFree(xmlURIEscapeStr(BAD_CAST str[0], BAD_CAST str[1])); + } + + /* Modifies string, so must come last. */ + xmlNormalizeURIPath(str[0]); + + xmlFree(str[0]); + xmlFree(str[1]); + + return 0; +} + diff --git a/third_party/libxml/src/fuzz/xml.c b/third_party/libxml/src/fuzz/xml.c new file mode 100644 index 000000000000..8b4c4efc53f8 --- /dev/null +++ b/third_party/libxml/src/fuzz/xml.c @@ -0,0 +1,104 @@ +/* + * xml.c: a libFuzzer target to test several XML parser interfaces. + * + * See Copyright for the status of this software. + */ + +#include +#include +#include +#include +#include +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); +#ifdef LIBXML_CATALOG_ENABLED + xmlInitializeCatalog(); +#endif + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + xmlSetExternalEntityLoader(xmlFuzzEntityLoader); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + static const size_t maxChunkSize = 128; + xmlDocPtr doc; + xmlParserCtxtPtr ctxt; + xmlTextReaderPtr reader; + xmlChar *out; + const char *docBuffer, *docUrl; + size_t maxSize, docSize, consumed, chunkSize; + int opts, outSize; + + xmlFuzzDataInit(data, size); + opts = xmlFuzzReadInt(); + + /* Lower maximum size when processing entities for now. */ + maxSize = opts & XML_PARSE_NOENT ? 50000 : 500000; + if (size > maxSize) + goto exit; + + xmlFuzzReadEntities(); + docBuffer = xmlFuzzMainEntity(&docSize); + docUrl = xmlFuzzMainUrl(); + if (docBuffer == NULL) + goto exit; + + /* Pull parser */ + + doc = xmlReadMemory(docBuffer, docSize, docUrl, NULL, opts); + if (opts & XML_PARSE_XINCLUDE) + xmlXIncludeProcessFlags(doc, opts); + /* Also test the serializer. */ + xmlDocDumpMemory(doc, &out, &outSize); + xmlFree(out); + xmlFreeDoc(doc); + + /* Push parser */ + + ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, docUrl); + if (ctxt == NULL) + goto exit; + xmlCtxtUseOptions(ctxt, opts); + + for (consumed = 0; consumed < docSize; consumed += chunkSize) { + chunkSize = docSize - consumed; + if (chunkSize > maxChunkSize) + chunkSize = maxChunkSize; + xmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0); + } + + xmlParseChunk(ctxt, NULL, 0, 1); + if (opts & XML_PARSE_XINCLUDE) + xmlXIncludeProcessFlags(ctxt->myDoc, opts); + xmlFreeDoc(ctxt->myDoc); + xmlFreeParserCtxt(ctxt); + + /* Reader */ + + reader = xmlReaderForMemory(docBuffer, docSize, NULL, NULL, opts); + if (reader == NULL) + goto exit; + while (xmlTextReaderRead(reader) == 1) { + if (xmlTextReaderNodeType(reader) == XML_ELEMENT_NODE) { + int i, n = xmlTextReaderAttributeCount(reader); + for (i=0; i +#include +#include "fuzz.h" + +int +LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, + char ***argv ATTRIBUTE_UNUSED) { + xmlInitParser(); + xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); + + return 0; +} + +int +LLVMFuzzerTestOneInput(const char *data, size_t size) { + xmlDocPtr doc; + const char *expr, *xml; + size_t exprSize, xmlSize; + + if (size > 10000) + return(0); + + xmlFuzzDataInit(data, size); + + expr = xmlFuzzReadString(&exprSize); + xml = xmlFuzzReadString(&xmlSize); + + /* Recovery mode allows more input to be fuzzed. */ + doc = xmlReadMemory(xml, xmlSize, NULL, NULL, XML_PARSE_RECOVER); + if (doc != NULL) { + xmlXPathContextPtr xpctxt = xmlXPathNewContext(doc); + + /* Operation limit to avoid timeout */ + xpctxt->opLimit = 500000; + + xmlXPathFreeObject(xmlXPtrEval(BAD_CAST expr, xpctxt)); + xmlXPathFreeContext(xpctxt); + } + xmlFreeDoc(doc); + + xmlFuzzDataCleanup(); + xmlResetLastError(); + + return(0); +} + diff --git a/third_party/libxml/src/fuzz/xpath.dict b/third_party/libxml/src/fuzz/xpath.dict new file mode 100644 index 000000000000..4fe375fbcda6 --- /dev/null +++ b/third_party/libxml/src/fuzz/xpath.dict @@ -0,0 +1,94 @@ +# XML + +elem_a="" +elem_b="" +elem_c="" +elem_d="" +elem_empty="" +elem_ns_a="" +elem_ns_b="" + +attr_a=" a='a'" +attr_b=" b='b'" + +ns_decl=" xmlns:a='a'" +ns_default=" xmlns='a'" +ns_prefix_a="a:" +ns_prefix_b="b:" + +cdata_section="" + +comment="" + +pi="" + +# XPath + +axis_ancestor="ancestor::" +axis_ancestor_or_self="ancestor-or-self::" +axis_attribute="attribute::" +axis_attribute_abbrev="@" +axis_child="child::" +axis_descendant="descendant::" +axis_descendant_or_self="descendant-or-self::" +axis_following="following::" +axis_following_sibling="following-sibling::" +axis_namespace="namespace::" +axis_parent="parent::" +axis_preceding="preceding::" +axis_preceding_siblings="preceding-sibling::" +axis_self="self::" + +node_test_ns="a:" + +val_num="=(1.0)" +val_str_sq="=('a')" +val_str_dq="=(\"a\")" +val_node_set="=(*)" +val_elem="=(b)" + +step_root="/" +step_descendant="//" +step_any="//*" +step_any_l="*//" +step_elem="//b" +step_ns_elem="//a:a" +step_comment="//comment()" +step_node="//node()" +step_node_l="node()//" +step_pi="//processing-instruction()" +step_text="//text()" +step_parent="../" + +op_plus="+1" +op_minus=" - 1" +op_neg="-" +op_mul="*1" +op_div=" div 1" +op_mod=" mod 1" +op_and=" and 1" +op_or=" or 1" +op_ne="!=1" +op_lt="<1" +op_gt=">1" +op_le="<=1" +op_ge=">=1" +op_predicate_num="[1]" +op_predicate_last="[last()]" +op_predicate_str="['a']" +op_predicate="[1=1]" +op_arg_num=",1" +op_arg_str=",'a'" +op_arg_node=",*" +op_union="|//b" + +var_num="=$f" +var_bool="=$b" +var_str="=$s" +var_node_set="=$n" + +# Unicode + +utf8_2="\xC3\x84" +utf8_3="\xE2\x80\x9C" +utf8_4="\xF0\x9F\x98\x80" diff --git a/third_party/libxml/src/include/libxml/Makefile.am b/third_party/libxml/src/include/libxml/Makefile.am index cf9297aad66b..328c1800e8d3 100644 --- a/third_party/libxml/src/include/libxml/Makefile.am +++ b/third_party/libxml/src/include/libxml/Makefile.am @@ -51,4 +51,4 @@ xmlinc_HEADERS = \ xmlsave.h \ schematron.h -EXTRA_DIST = xmlversion.h.in +EXTRA_DIST = xmlversion.h.in xmlwin32version.h.in diff --git a/third_party/libxml/src/include/libxml/c14n.h b/third_party/libxml/src/include/libxml/c14n.h index d74847df8423..af93de63a4c3 100644 --- a/third_party/libxml/src/include/libxml/c14n.h +++ b/third_party/libxml/src/include/libxml/c14n.h @@ -16,17 +16,19 @@ */ #ifndef __XML_C14N_H__ #define __XML_C14N_H__ + +#include + #ifdef LIBXML_C14N_ENABLED #ifdef LIBXML_OUTPUT_ENABLED +#include +#include + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ -#include -#include -#include - /* * XML Canonicalization * http://www.w3.org/TR/xml-c14n diff --git a/third_party/libxml/src/include/libxml/parser.h b/third_party/libxml/src/include/libxml/parser.h index 3020b20c5009..0ba1c387a97d 100644 --- a/third_party/libxml/src/include/libxml/parser.h +++ b/third_party/libxml/src/include/libxml/parser.h @@ -169,6 +169,8 @@ typedef enum { XML_PARSE_READER = 5 } xmlParserMode; +typedef struct _xmlStartTag xmlStartTag; + /** * xmlParserCtxt: * @@ -231,7 +233,7 @@ struct _xmlParserCtxt { int nameMax; /* Max depth of the parsing stack */ const xmlChar * *nameTab; /* array of nodes */ - long nbChars; /* number of xmlChar processed */ + long nbChars; /* unused */ long checkIndex; /* used by progressive parsing lookup */ int keepBlanks; /* ugly but ... */ int disableSAX; /* SAX callbacks are disabled */ @@ -280,7 +282,7 @@ struct _xmlParserCtxt { int nsMax; /* the size of the arrays */ const xmlChar * *nsTab; /* the array of prefix/namespace name */ int *attallocs; /* which attribute were allocated */ - void * *pushTab; /* array of data for push */ + xmlStartTag *pushTab; /* array of data for push */ xmlHashTablePtr attsDefault; /* defaulted attributes if any */ xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ int nsWellFormed; /* is the document XML Namespace okay */ diff --git a/third_party/libxml/src/include/libxml/xmlexports.h b/third_party/libxml/src/include/libxml/xmlexports.h index 9cc3f105423f..aceede5ec050 100644 --- a/third_party/libxml/src/include/libxml/xmlexports.h +++ b/third_party/libxml/src/include/libxml/xmlexports.h @@ -3,43 +3,46 @@ * Description: macros for marking symbols as exportable/importable. * * Copy: See Copyright for the status of this software. - * - * Author: Igor Zlatovic */ #ifndef __XML_EXPORTS_H__ #define __XML_EXPORTS_H__ -/** - * XMLPUBFUN, XMLPUBVAR, XMLCALL - * - * Macros which declare an exportable function, an exportable variable and - * the calling convention used for functions. - * - * Please use an extra block for every platform/compiler combination when - * modifying this, rather than overlong #ifdef lines. This helps - * readability as well as the fact that different compilers on the same - * platform might need different definitions. - */ +#if defined(_WIN32) || defined(__CYGWIN__) +/** DOC_DISABLE */ + +#ifdef LIBXML_STATIC + #define XMLPUBLIC +#elif defined(IN_LIBXML) + #define XMLPUBLIC __declspec(dllexport) +#else + #define XMLPUBLIC __declspec(dllimport) +#endif + +#if defined(LIBXML_FASTCALL) + #define XMLCALL __fastcall +#else + #define XMLCALL __cdecl +#endif +#define XMLCDECL __cdecl + +/** DOC_ENABLE */ +#else /* not Windows */ /** - * XMLPUBFUN: - * - * Macros which declare an exportable function - */ -#define XMLPUBFUN -/** - * XMLPUBVAR: + * XMLPUBLIC: * - * Macros which declare an exportable variable + * Macro which declares a public symbol */ -#define XMLPUBVAR extern +#define XMLPUBLIC + /** * XMLCALL: * - * Macros which declare the called convention for exported functions + * Macro which declares the calling convention for exported functions */ #define XMLCALL + /** * XMLCDECL: * @@ -48,101 +51,21 @@ */ #define XMLCDECL -/** DOC_DISABLE */ - -/* Windows platform with MS compiler */ -#if defined(_WIN32) && defined(_MSC_VER) && !defined(__LB_XB1__) && \ - !defined(__LB_XB360__) - #undef XMLPUBFUN - #undef XMLPUBVAR - #undef XMLCALL - #undef XMLCDECL - #if defined(IN_LIBXML) && !defined(LIBXML_STATIC) - #define XMLPUBFUN __declspec(dllexport) - #define XMLPUBVAR __declspec(dllexport) - #else - #define XMLPUBFUN - #if !defined(LIBXML_STATIC) - #define XMLPUBVAR __declspec(dllimport) extern - #else - #define XMLPUBVAR extern - #endif - #endif - #if defined(LIBXML_FASTCALL) - #define XMLCALL __fastcall - #else - #define XMLCALL __cdecl - #endif - #define XMLCDECL __cdecl -#endif +#endif /* platform switch */ -/* Windows platform with Borland compiler */ -#if defined(_WIN32) && defined(__BORLANDC__) - #undef XMLPUBFUN - #undef XMLPUBVAR - #undef XMLCALL - #undef XMLCDECL - #if defined(IN_LIBXML) && !defined(LIBXML_STATIC) - #define XMLPUBFUN __declspec(dllexport) - #define XMLPUBVAR __declspec(dllexport) extern - #else - #define XMLPUBFUN - #if !defined(LIBXML_STATIC) - #define XMLPUBVAR __declspec(dllimport) extern - #else - #define XMLPUBVAR extern - #endif - #endif - #define XMLCALL __cdecl - #define XMLCDECL __cdecl -#endif - -/* Windows platform with GNU compiler (Mingw) */ -#if defined(_WIN32) && defined(__MINGW32__) - #undef XMLPUBFUN - #undef XMLPUBVAR - #undef XMLCALL - #undef XMLCDECL - /* - * if defined(IN_LIBXML) this raises problems on mingw with msys - * _imp__xmlFree listed as missing. Try to workaround the problem - * by also making that declaration when compiling client code. - */ - #if defined(IN_LIBXML) && !defined(LIBXML_STATIC) - #define XMLPUBFUN __declspec(dllexport) - #define XMLPUBVAR __declspec(dllexport) extern - #else - #define XMLPUBFUN - #if !defined(LIBXML_STATIC) - #define XMLPUBVAR __declspec(dllimport) extern - #else - #define XMLPUBVAR extern - #endif - #endif - #define XMLCALL __cdecl - #define XMLCDECL __cdecl -#endif +/* + * XMLPUBFUN: + * + * Macro which declares an exportable function + */ +#define XMLPUBFUN XMLPUBLIC -/* Cygwin platform (does not define _WIN32), GNU compiler */ -#if defined(__CYGWIN__) - #undef XMLPUBFUN - #undef XMLPUBVAR - #undef XMLCALL - #undef XMLCDECL - #if defined(IN_LIBXML) && !defined(LIBXML_STATIC) - #define XMLPUBFUN __declspec(dllexport) - #define XMLPUBVAR __declspec(dllexport) - #else - #define XMLPUBFUN - #if !defined(LIBXML_STATIC) - #define XMLPUBVAR __declspec(dllimport) extern - #else - #define XMLPUBVAR extern - #endif - #endif - #define XMLCALL __cdecl - #define XMLCDECL __cdecl -#endif +/** + * XMLPUBVAR: + * + * Macro which declares an exportable variable + */ +#define XMLPUBVAR XMLPUBLIC extern /* Compatibility */ #if !defined(LIBXML_DLL_IMPORT) diff --git a/third_party/libxml/src/include/libxml/xpath.h b/third_party/libxml/src/include/libxml/xpath.h index a4303ff1db83..539593fa506b 100644 --- a/third_party/libxml/src/include/libxml/xpath.h +++ b/third_party/libxml/src/include/libxml/xpath.h @@ -359,8 +359,6 @@ struct _xmlXPathContext { unsigned long opLimit; unsigned long opCount; int depth; - int maxDepth; - int maxParserDepth; }; /* diff --git a/third_party/libxml/src/libxml.m4 b/third_party/libxml/src/libxml.m4 index b5df9158d0bd..09de9fe2defa 100644 --- a/third_party/libxml/src/libxml.m4 +++ b/third_party/libxml/src/libxml.m4 @@ -1,4 +1,6 @@ # Configure paths for LIBXML2 +# Simon Josefsson 2020-02-12 +# Fix autoconf 2.70+ warnings # Mike Hommey 2004-06-19 # use CPPFLAGS instead of CFLAGS # Toshio Kuratomi 2001-04-21 @@ -58,7 +60,8 @@ dnl Now check if the installed libxml is sufficiently new. dnl (Also sanity checks the results of xml2-config to some extent) dnl rm -f conf.xmltest - AC_TRY_RUN([ + AC_RUN_IFELSE( + [AC_LANG_SOURCE([[ #include #include #include @@ -133,7 +136,7 @@ main() } return 1; } -],, no_xml=yes,[echo $ac_n "cross compiling; assumed OK... $ac_c"]) +]])],, no_xml=yes,[echo $ac_n "cross compiling; assumed OK... $ac_c"]) CPPFLAGS="$ac_save_CPPFLAGS" LIBS="$ac_save_LIBS" fi @@ -156,10 +159,11 @@ main() echo "*** Could not run libxml test program, checking why..." CPPFLAGS="$CPPFLAGS $XML_CPPFLAGS" LIBS="$LIBS $XML_LIBS" - AC_TRY_LINK([ + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[ #include #include -], [ LIBXML_TEST_VERSION; return 0;], +]], [[ LIBXML_TEST_VERSION; return 0;]])], [ echo "*** The test program compiled, but did not run. This usually means" echo "*** that the run-time linker is not finding LIBXML or finding the wrong" echo "*** version of LIBXML. If it is not finding LIBXML, you'll need to set your" diff --git a/third_party/libxml/src/libxml2-config.cmake.cmake.in b/third_party/libxml/src/libxml2-config.cmake.cmake.in new file mode 100644 index 000000000000..27586aab1539 --- /dev/null +++ b/third_party/libxml/src/libxml2-config.cmake.cmake.in @@ -0,0 +1,136 @@ +# libxml2-config.cmake +# -------------------- +# +# Libxml2 cmake module. +# This module sets the following variables: +# +# :: +# +# LIBXML2_INCLUDE_DIR - Directory where LibXml2 headers are located. +# LIBXML2_INCLUDE_DIRS - list of the include directories needed to use LibXml2. +# LIBXML2_LIBRARY - path to the LibXml2 library. +# LIBXML2_LIBRARIES - xml2 libraries to link against. +# LIBXML2_DEFINITIONS - the compiler switches required for using LibXml2. +# LIBXML2_VERSION_MAJOR - The major version of libxml2. +# LIBXML2_VERSION_MINOR - The minor version of libxml2. +# LIBXML2_VERSION_PATCH - The patch version of libxml2. +# LIBXML2_VERSION_STRING - version number as a string (ex: "2.3.4") +# LIBXML2_MODULES - whether libxml2 has dso support +# LIBXML2_XMLLINT_EXECUTABLE - path to the XML checking tool xmllint coming with LibXml2 + +include("${CMAKE_CURRENT_LIST_DIR}/libxml2-export.cmake") + +@PACKAGE_INIT@ + +set(LIBXML2_VERSION_MAJOR @LIBXML_MAJOR_VERSION@) +set(LIBXML2_VERSION_MINOR @LIBXML_MINOR_VERSION@) +set(LIBXML2_VERSION_PATCH @LIBXML_MICRO_VERSION@) +set(LIBXML2_VERSION_STRING "@VERSION@") +set(LIBXML2_INSTALL_PREFIX ${PACKAGE_PREFIX_DIR}) +set(LIBXML2_INCLUDE_DIR ${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@/libxml2) +set(LIBXML2_LIBRARY_DIR ${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_LIBDIR@) + +macro(select_library_location target basename) + if(TARGET ${target}) + foreach(property IN ITEMS IMPORTED_LOCATION IMPORTED_IMPLIB) + get_target_property(${basename}_${property}_DEBUG ${target} ${property}_DEBUG) + get_target_property(${basename}_${property}_MINSIZEREL ${target} ${property}_MINSIZEREL) + get_target_property(${basename}_${property}_RELEASE ${target} ${property}_RELEASE) + get_target_property(${basename}_${property}_RELWITHDEBINFO ${target} ${property}_RELWITHDEBINFO) + + if(${basename}_${property}_DEBUG AND ${basename}_${property}_RELEASE) + set(${basename}_LIBRARY debug ${${basename}_${property}_DEBUG} optimized ${${basename}_${property}_RELEASE}) + elseif(${basename}_${property}_DEBUG AND ${basename}_${property}_RELWITHDEBINFO) + set(${basename}_LIBRARY debug ${${basename}_${property}_DEBUG} optimized ${${basename}_${property}_RELWITHDEBINFO}) + elseif(${basename}_${property}_DEBUG AND ${basename}_${property}_MINSIZEREL) + set(${basename}_LIBRARY debug ${${basename}_${property}_DEBUG} optimized ${${basename}_${property}_MINSIZEREL}) + elseif(${basename}_${property}_RELEASE) + set(${basename}_LIBRARY ${${basename}_${property}_RELEASE}) + elseif(${basename}_${property}_RELWITHDEBINFO) + set(${basename}_LIBRARY ${${basename}_${property}_RELWITHDEBINFO}) + elseif(${basename}_${property}_MINSIZEREL) + set(${basename}_LIBRARY ${${basename}_${property}_MINSIZEREL}) + elseif(${basename}_${property}_DEBUG) + set(${basename}_LIBRARY ${${basename}_${property}_DEBUG}) + endif() + endforeach() + endif() +endmacro() + +macro(select_executable_location target basename) + if(TARGET ${target}) + get_target_property(${basename}_IMPORTED_LOCATION_DEBUG ${target} IMPORTED_LOCATION_DEBUG) + get_target_property(${basename}_IMPORTED_LOCATION_MINSIZEREL ${target} IMPORTED_LOCATION_MINSIZEREL) + get_target_property(${basename}_IMPORTED_LOCATION_RELEASE ${target} IMPORTED_LOCATION_RELEASE) + get_target_property(${basename}_IMPORTED_LOCATION_RELWITHDEBINFO ${target} IMPORTED_LOCATION_RELWITHDEBINFO) + + if(${basename}_IMPORTED_LOCATION_RELEASE) + set(${basename}_EXECUTABLE ${${basename}_IMPORTED_LOCATION_RELEASE}) + elseif(${basename}_IMPORTED_LOCATION_RELWITHDEBINFO) + set(${basename}_EXECUTABLE ${${basename}_IMPORTED_LOCATION_RELWITHDEBINFO}) + elseif(${basename}_IMPORTED_LOCATION_MINSIZEREL) + set(${basename}_EXECUTABLE ${${basename}_IMPORTED_LOCATION_MINSIZEREL}) + elseif(${basename}_IMPORTED_LOCATION_DEBUG) + set(${basename}_EXECUTABLE ${${basename}_IMPORTED_LOCATION_DEBUG}) + endif() + endif() +endmacro() + +select_library_location(LibXml2::LibXml2 LIBXML2) +select_executable_location(LibXml2::xmlcatalog LIBXML2_XMLCATALOG) +select_executable_location(LibXml2::xmllint LIBXML2_XMLLINT) + +set(LIBXML2_LIBRARIES ${LIBXML2_LIBRARY}) +set(LIBXML2_INCLUDE_DIRS ${LIBXML2_INCLUDE_DIR}) + +include(CMakeFindDependencyMacro) + +set(LIBXML2_SHARED @BUILD_SHARED_LIBS@) +set(LIBXML2_WITH_ICONV @LIBXML2_WITH_ICONV@) +set(LIBXML2_WITH_THREADS @LIBXML2_WITH_THREADS@) +set(LIBXML2_WITH_ICU @LIBXML2_WITH_ICU@) +set(LIBXML2_WITH_LZMA @LIBXML2_WITH_LZMA@) +set(LIBXML2_WITH_ZLIB @LIBXML2_WITH_ZLIB@) + +if(LIBXML2_WITH_ICONV) + find_dependency(Iconv) + list(APPEND LIBXML2_LIBRARIES ${Iconv_LIBRARIES}) + list(APPEND LIBXML2_INCLUDE_DIRS ${Iconv_INCLUDE_DIRS}) +endif() + +if(NOT LIBXML2_SHARED) + set(LIBXML2_DEFINITIONS -DLIBXML_STATIC) + + if(LIBXML2_WITH_THREADS) + find_dependency(Threads) + list(APPEND LIBXML2_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) + endif() + + if(LIBXML2_WITH_ICU) + find_dependency(ICU COMPONENTS data i18n uc) + list(APPEND LIBXML2_LIBRARIES ${ICU_LIBRARIES}) + endif() + + if(LIBXML2_WITH_LZMA) + find_dependency(LibLZMA) + list(APPEND LIBXML2_LIBRARIES ${LIBLZMA_LIBRARIES}) + endif() + + if(LIBXML2_WITH_ZLIB) + find_dependency(ZLIB) + list(APPEND LIBXML2_LIBRARIES ${ZLIB_LIBRARIES}) + endif() + + if(UNIX) + list(APPEND LIBXML2_LIBRARIES m) + endif() + + if(WIN32) + list(APPEND LIBXML2_LIBRARIES ws2_32) + endif() +endif() + +# whether libxml2 has dso support +set(LIBXML2_MODULES @LIBXML2_WITH_MODULES@) + +mark_as_advanced(LIBXML2_LIBRARY LIBXML2_XMLCATALOG_EXECUTABLE LIBXML2_XMLLINT_EXECUTABLE) diff --git a/third_party/libxml/src/libxml2-config.cmake.in b/third_party/libxml/src/libxml2-config.cmake.in index 6b16fc2e0420..49896900fdbb 100644 --- a/third_party/libxml/src/libxml2-config.cmake.in +++ b/third_party/libxml/src/libxml2-config.cmake.in @@ -2,17 +2,26 @@ # -------------------- # # Libxml2 cmake module. -# THis module sets the following variables: +# This module sets the following variables: # # :: # -# LIBXML2_INCLUDE_DIRS - Directory where libxml2 headers are located. -# LIBXML2_LIBRARIES - xml2 libraries to link against. -# LIBXML2_VERSION_MAJOR - The major version of libxml2. -# LIBXML2_VERSION_MINOR - The minor version of libxml2. -# LIBXML2_VERSION_PATCH - The patch version of libxml2. -# LIBXML2_VERSION_STRING - version number as a string (ex: "2.3.4") -# LIBXML2_MODULES - whether libxml2 as dso support +# LIBXML2_INCLUDE_DIR - Directory where LibXml2 headers are located. +# LIBXML2_INCLUDE_DIRS - list of the include directories needed to use LibXml2. +# LIBXML2_LIBRARY - path to the LibXml2 library. +# LIBXML2_LIBRARIES - xml2 libraries to link against. +# LIBXML2_DEFINITIONS - the compiler switches required for using LibXml2. +# LIBXML2_VERSION_MAJOR - The major version of libxml2. +# LIBXML2_VERSION_MINOR - The minor version of libxml2. +# LIBXML2_VERSION_PATCH - The patch version of libxml2. +# LIBXML2_VERSION_STRING - version number as a string (ex: "2.3.4") +# LIBXML2_MODULES - whether libxml2 has dso support +# LIBXML2_XMLLINT_EXECUTABLE - path to the XML checking tool xmllint coming with LibXml2 +# +# The following targets are defined: +# +# LibXml2::LibXml2 - the LibXml2 library +# LibXml2::xmllint - the xmllint command-line executable get_filename_component(_libxml2_rootdir ${CMAKE_CURRENT_LIST_DIR}/../../../ ABSOLUTE) @@ -20,31 +29,87 @@ set(LIBXML2_VERSION_MAJOR @LIBXML_MAJOR_VERSION@) set(LIBXML2_VERSION_MINOR @LIBXML_MINOR_VERSION@) set(LIBXML2_VERSION_MICRO @LIBXML_MICRO_VERSION@) set(LIBXML2_VERSION_STRING "@VERSION@") +set(LIBXML2_DEFINITIONS "@XML_CFLAGS@") set(LIBXML2_INSTALL_PREFIX ${_libxml2_rootdir}) -set(LIBXML2_INCLUDE_DIRS ${_libxml2_rootdir}/include ${_libxml2_rootdir}/include/libxml2) +set(LIBXML2_INCLUDE_DIR ${_libxml2_rootdir}/include/libxml2) set(LIBXML2_LIBRARY_DIR ${_libxml2_rootdir}/lib) -set(LIBXML2_LIBRARIES -L${LIBXML2_LIBRARY_DIR} -lxml2) -if(@WITH_THREADS@) - find_package(Threads REQUIRED) - list(APPEND LIBXML2_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) +find_library(LIBXML2_LIBRARY NAMES xml2 HINTS ${LIBXML2_LIBRARY_DIR} NO_DEFAULT_PATH) +find_program(LIBXML2_XMLCATALOG_EXECUTABLE NAMES xmlcatalog HINTS ${_libxml2_rootdir}/bin NO_DEFAULT_PATH) +find_program(LIBXML2_XMLLINT_EXECUTABLE NAMES xmllint HINTS ${_libxml2_rootdir}/bin NO_DEFAULT_PATH) + +set(LIBXML2_LIBRARIES ${LIBXML2_LIBRARY}) +set(LIBXML2_INCLUDE_DIRS ${LIBXML2_INCLUDE_DIR}) +unset(LIBXML2_INTERFACE_LINK_LIBRARIES) + +include(CMakeFindDependencyMacro) + +set(LIBXML2_WITH_ICONV @WITH_ICONV@) +set(LIBXML2_WITH_THREADS @WITH_THREADS@) +set(LIBXML2_WITH_ICU @WITH_ICU@) +set(LIBXML2_WITH_LZMA @WITH_LZMA@) +set(LIBXML2_WITH_ZLIB @WITH_ZLIB@) + +if(LIBXML2_WITH_ICONV) + find_dependency(Iconv) + list(APPEND LIBXML2_LIBRARIES ${Iconv_LIBRARIES}) + list(APPEND LIBXML2_INCLUDE_DIRS ${Iconv_INCLUDE_DIRS}) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "Iconv::Iconv") +endif() + +if(LIBXML2_WITH_THREADS) + find_dependency(Threads) + list(APPEND LIBXML2_LIBRARIES ${CMAKE_THREAD_LIBS_INIT}) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$") +endif() + +if(LIBXML2_WITH_ICU) + find_dependency(ICU COMPONENTS data i18n uc) + list(APPEND LIBXML2_LIBRARIES ${ICU_LIBRARIES}) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$;\$;\$") endif() -if(@WITH_LZMA@) - find_package(LibLZMA REQUIRED) +if(LIBXML2_WITH_LZMA) + find_dependency(LibLZMA) list(APPEND LIBXML2_LIBRARIES ${LIBLZMA_LIBRARIES}) - list(APPEND LIBXML2_INCLUDE_DIRS ${LIBLZMA_INCLUDE_DIRS}) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$") endif() -if(@WITH_ZLIB@) - find_package(ZLIB REQUIRED) +if(LIBXML2_WITH_ZLIB) + find_dependency(ZLIB) list(APPEND LIBXML2_LIBRARIES ${ZLIB_LIBRARIES}) - list(APPEND LIBXML2_INCLUDE_DIRS ${ZLIB_INCLUDE_DIRS}) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$") endif() -list(APPEND LIBXML2_LIBRARIES @ICU_LIBS@ @ICONV_LIBS@ @M_LIBS@ @WIN32_EXTRA_LIBADD@ @LIBS@) +if(UNIX) + list(APPEND LIBXML2_LIBRARIES m) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$") +endif() + +if(WIN32) + list(APPEND LIBXML2_LIBRARIES ws2_32) + list(APPEND LIBXML2_INTERFACE_LINK_LIBRARIES "\$") +endif() # whether libxml2 has dso support set(LIBXML2_MODULES @WITH_MODULES@) -mark_as_advanced( LIBXML2_INCLUDE_DIRS LIBXML2_LIBRARIES ) +mark_as_advanced(LIBXML2_LIBRARY LIBXML2_XMLCATALOG_EXECUTABLE LIBXML2_XMLLINT_EXECUTABLE) + +if(NOT TARGET LibXml2::LibXml2 AND DEFINED LIBXML2_LIBRARY AND DEFINED LIBXML2_INCLUDE_DIRS) + add_library(LibXml2::LibXml2 UNKNOWN IMPORTED) + set_target_properties(LibXml2::LibXml2 PROPERTIES IMPORTED_LOCATION "${LIBXML2_LIBRARY}") + set_target_properties(LibXml2::LibXml2 PROPERTIES INTERFACE_COMPILE_OPTIONS "${LIBXML2_DEFINITIONS}") + set_target_properties(LibXml2::LibXml2 PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${LIBXML2_INCLUDE_DIRS}") + set_target_properties(LibXml2::LibXml2 PROPERTIES INTERFACE_LINK_LIBRARIES "${LIBXML2_INTERFACE_LINK_LIBRARIES}") +endif() + +if(NOT TARGET LibXml2::xmlcatalog AND DEFINED LIBXML2_XMLCATALOG_EXECUTABLE) + add_executable(LibXml2::xmlcatalog IMPORTED) + set_target_properties(LibXml2::xmlcatalog PROPERTIES IMPORTED_LOCATION "${LIBXML2_XMLCATALOG_EXECUTABLE}") +endif() + +if(NOT TARGET LibXml2::xmllint AND DEFINED LIBXML2_XMLLINT_EXECUTABLE) + add_executable(LibXml2::xmllint IMPORTED) + set_target_properties(LibXml2::xmllint PROPERTIES IMPORTED_LOCATION "${LIBXML2_XMLLINT_EXECUTABLE}") +endif() diff --git a/third_party/libxml/src/libxml2.spec b/third_party/libxml/src/libxml2.spec index d2a5ffb307d7..b3eca4f01d12 100644 --- a/third_party/libxml/src/libxml2.spec +++ b/third_party/libxml/src/libxml2.spec @@ -2,7 +2,7 @@ Summary: Library providing XML and HTML support Name: libxml2 -Version: 2.9.10 +Version: 2.9.12 Release: 1%{?dist}%{?extra_release} License: MIT Group: Development/Libraries @@ -204,6 +204,6 @@ rm -fr %{buildroot} %endif # with_python3 %changelog -* Fri Apr 10 2020 Daniel Veillard -- upstream release 2.9.10 see http://xmlsoft.org/news.html +* Tue Aug 3 2021 Daniel Veillard +- upstream release 2.9.12 see http://xmlsoft.org/news.html diff --git a/third_party/libxml/src/parser.c b/third_party/libxml/src/parser.c index 87d9e619753d..d5b72e4fc591 100644 --- a/third_party/libxml/src/parser.c +++ b/third_party/libxml/src/parser.c @@ -87,6 +87,13 @@ #include "buf.h" #include "enc.h" +struct _xmlStartTag { + const xmlChar *prefix; + const xmlChar *URI; + int line; + int nsNr; +}; + static void xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); @@ -96,6 +103,12 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID, static void xmlHaltParser(xmlParserCtxtPtr ctxt); +static int +xmlParseElementStart(xmlParserCtxtPtr ctxt); + +static void +xmlParseElementEnd(xmlParserCtxtPtr ctxt); + /************************************************************************ * * * Arbitrary limits set in the parser. See XML_PARSE_HUGE * @@ -127,6 +140,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, xmlEntityPtr ent, size_t replacement) { size_t consumed = 0; + int i; if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE)) return (0); @@ -171,6 +185,28 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size, rep = NULL; } } + + /* + * Prevent entity exponential check, not just replacement while + * parsing the DTD + * The check is potentially costly so do that only once in a thousand + */ + if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) && + (ctxt->nbentities % 1024 == 0)) { + for (i = 0;i < ctxt->inputNr;i++) { + consumed += ctxt->inputTab[i]->consumed + + (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base); + } + if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + ctxt->instate = XML_PARSER_EOF; + return (1); + } + consumed = 0; + } + + + if (replacement != 0) { if (replacement < XML_MAX_TEXT_LENGTH) return(0); @@ -1074,11 +1110,15 @@ xmlHasFeature(xmlFeature feature) */ static void xmlDetectSAX2(xmlParserCtxtPtr ctxt) { + xmlSAXHandlerPtr sax; if (ctxt == NULL) return; + sax = ctxt->sax; #ifdef LIBXML_SAX1_ENABLED - if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) && - ((ctxt->sax->startElementNs != NULL) || - (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1; + if ((sax) && (sax->initialized == XML_SAX2_MAGIC) && + ((sax->startElementNs != NULL) || + (sax->endElementNs != NULL) || + ((sax->startElement == NULL) && (sax->endElement == NULL)))) + ctxt->sax2 = 1; #else ctxt->sax2 = 1; #endif /* LIBXML_SAX1_ENABLED */ @@ -1829,13 +1869,14 @@ nodePop(xmlParserCtxtPtr ctxt) return (ret); } -#ifdef LIBXML_PUSH_ENABLED /** * nameNsPush: * @ctxt: an XML parser context * @value: the element name * @prefix: the element prefix * @URI: the element namespace name + * @line: the current line number for error messages + * @nsNr: the number of namespaces pushed on the namespace table * * Pushes a new element name/prefix/URL on top of the name stack * @@ -1843,11 +1884,13 @@ nodePop(xmlParserCtxtPtr ctxt) */ static int nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, - const xmlChar *prefix, const xmlChar *URI, int nsNr) + const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) { + xmlStartTag *tag; + if (ctxt->nameNr >= ctxt->nameMax) { const xmlChar * *tmp; - void **tmp2; + xmlStartTag *tmp2; ctxt->nameMax *= 2; tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, ctxt->nameMax * @@ -1857,25 +1900,33 @@ nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, goto mem_error; } ctxt->nameTab = tmp; - tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab, - ctxt->nameMax * 3 * + tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, + ctxt->nameMax * sizeof(ctxt->pushTab[0])); if (tmp2 == NULL) { ctxt->nameMax /= 2; goto mem_error; } ctxt->pushTab = tmp2; + } else if (ctxt->pushTab == NULL) { + ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * + sizeof(ctxt->pushTab[0])); + if (ctxt->pushTab == NULL) + goto mem_error; } ctxt->nameTab[ctxt->nameNr] = value; ctxt->name = value; - ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix; - ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI; - ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (ptrdiff_t) nsNr; + tag = &ctxt->pushTab[ctxt->nameNr]; + tag->prefix = prefix; + tag->URI = URI; + tag->line = line; + tag->nsNr = nsNr; return (ctxt->nameNr++); mem_error: xmlErrMemory(ctxt, NULL); return (-1); } +#ifdef LIBXML_PUSH_ENABLED /** * nameNsPop: * @ctxt: an XML parser context @@ -2051,7 +2102,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { ((unsigned char *) s)[ 9 ] == c10 ) #define SKIP(val) do { \ - ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \ + ctxt->input->cur += (val),ctxt->input->col+=(val); \ if (*ctxt->input->cur == 0) \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ } while (0) @@ -2062,7 +2113,6 @@ static int spacePop(xmlParserCtxtPtr ctxt) { if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ - ctxt->nbChars++; \ ctxt->input->cur++; \ } \ if (*ctxt->input->cur == 0) \ @@ -2115,7 +2165,6 @@ static void xmlGROW (xmlParserCtxtPtr ctxt) { #define NEXT1 { \ ctxt->input->col++; \ ctxt->input->cur++; \ - ctxt->nbChars++; \ if (*ctxt->input->cur == 0) \ xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \ } @@ -2152,7 +2201,7 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { * It's Okay to use CUR/NEXT here since all the blanks are on * the ASCII range. */ - if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) { + if (ctxt->instate != XML_PARSER_DTD) { const xmlChar *cur; /* * if we are in the document content, go really fast @@ -2328,7 +2377,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->input->col++; - ctxt->nbChars ++; ctxt->input->cur++; } } else if ((RAW == '&') && (NXT(1) == '#')) { @@ -2357,7 +2405,6 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ ctxt->input->col++; - ctxt->nbChars ++; ctxt->input->cur++; } } else { @@ -2680,8 +2727,10 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; - if (rep == NULL) + if (rep == NULL) { + ent->content[0] = 0; goto int_error; + } current = rep; while (*current != 0) { /* non input consuming loop */ @@ -2736,8 +2785,11 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, rep = xmlStringDecodeEntities(ctxt, ent->content, what, 0, 0, 0); ctxt->depth--; - if (rep == NULL) + if (rep == NULL) { + if (ent->content != NULL) + ent->content[0] = 0; goto int_error; + } current = rep; while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; @@ -3329,7 +3381,6 @@ xmlParseName(xmlParserCtxtPtr ctxt) { } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; if (ret == NULL) xmlErrMemory(ctxt, NULL); @@ -3452,7 +3503,6 @@ xmlParseNCName(xmlParserCtxtPtr ctxt) { } ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); ctxt->input->cur = in; - ctxt->nbChars += count; ctxt->input->col += count; if (ret == NULL) { xmlErrMemory(ctxt, NULL); @@ -3489,10 +3539,10 @@ xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { while (*in != 0 && *in == *cmp) { ++in; ++cmp; - ctxt->input->col++; } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { /* success */ + ctxt->input->col += in - ctxt->input->cur; ctxt->input->cur = in; return (const xmlChar*) 1; } @@ -4202,6 +4252,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { } count++; if (count > 50) { + SHRINK; GROW; count = 0; if (ctxt->instate == XML_PARSER_EOF) { @@ -4289,6 +4340,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { buf[len++] = cur; count++; if (count > 50) { + SHRINK; GROW; count = 0; if (ctxt->instate == XML_PARSER_EOF) { @@ -4504,7 +4556,7 @@ xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) { if (ctxt->instate == XML_PARSER_EOF) return; in = ctxt->input->cur; - } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); nbchar = 0; } ctxt->input->line = line; @@ -4569,6 +4621,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) { } count++; if (count > 50) { + SHRINK; GROW; count = 0; if (ctxt->instate == XML_PARSER_EOF) @@ -4774,6 +4827,7 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, count++; if (count > 50) { + SHRINK; GROW; count = 0; if (ctxt->instate == XML_PARSER_EOF) { @@ -4985,7 +5039,7 @@ xmlParseComment(xmlParserCtxtPtr ctxt) { ctxt->input->col++; goto get_more; } - } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09)); + } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); xmlParseCommentComplex(ctxt, buf, len, size); ctxt->instate = state; return; @@ -5184,6 +5238,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) { } count++; if (count > 50) { + SHRINK; GROW; if (ctxt->instate == XML_PARSER_EOF) { xmlFree(buf); @@ -6080,14 +6135,20 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { NEXT; if (elem == NULL) { ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); - if (ret == NULL) return(NULL); + if (ret == NULL) { + xmlFreeDocElementContent(ctxt->myDoc, cur); + return(NULL); + } ret->c1 = cur; if (cur != NULL) cur->parent = ret; cur = ret; } else { n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); - if (n == NULL) return(NULL); + if (n == NULL) { + xmlFreeDocElementContent(ctxt->myDoc, ret); + return(NULL); + } n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); if (n->c1 != NULL) n->c1->parent = n; @@ -6190,6 +6251,8 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, SKIP_BLANKS; cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, depth + 1); + if (cur == NULL) + return(NULL); SKIP_BLANKS; GROW; } else { @@ -6323,6 +6386,11 @@ xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk, SKIP_BLANKS; last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, depth + 1); + if (last == NULL) { + if (ret != NULL) + xmlFreeDocElementContent(ctxt->myDoc, ret); + return(NULL); + } SKIP_BLANKS; } else { elem = xmlParseName(ctxt); @@ -6848,6 +6916,7 @@ void xmlParseTextDecl(xmlParserCtxtPtr ctxt) { xmlChar *version; const xmlChar *encoding; + int oldstate; /* * We know that 'instate; + ctxt->instate = XML_PARSER_START; + if (SKIP_BLANKS == 0) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Space needed after 'instate = oldstate; return; } if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) { @@ -6905,6 +6979,8 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { MOVETO_ENDTAG(CUR_PTR); NEXT; } + + ctxt->instate = oldstate; } /** @@ -7147,6 +7223,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ent->checked |= 1; if (ret == XML_ERR_ENTITY_LOOP) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + xmlHaltParser(ctxt); xmlFreeNodeList(list); return; } @@ -7927,6 +8004,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) xmlChar start[4]; xmlCharEncoding enc; + if (xmlParserEntityCheck(ctxt, 0, entity, 0)) + return; + if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && ((ctxt->options & XML_PARSE_NOENT) == 0) && ((ctxt->options & XML_PARSE_DTDVALID) == 0) && @@ -8825,6 +8905,7 @@ xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name, } if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { /* success */ + ctxt->input->col += in - ctxt->input->cur; ctxt->input->cur = in; return((const xmlChar*) 1); } @@ -9625,10 +9706,8 @@ xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref, */ static void -xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, - const xmlChar *URI, int line, int nsNr, int tlen) { +xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) { const xmlChar *name; - size_t curLength; GROW; if ((RAW != '<') || (NXT(1) != '/')) { @@ -9637,24 +9716,10 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, } SKIP(2); - curLength = ctxt->input->end - ctxt->input->cur; - if ((tlen > 0) && (curLength >= (size_t)tlen) && - (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) { - if ((curLength >= (size_t)(tlen + 1)) && - (ctxt->input->cur[tlen] == '>')) { - ctxt->input->cur += tlen + 1; - ctxt->input->col += tlen + 1; - goto done; - } - ctxt->input->cur += tlen; - ctxt->input->col += tlen; - name = (xmlChar*)1; - } else { - if (prefix == NULL) - name = xmlParseNameAndCompare(ctxt, ctxt->name); - else - name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix); - } + if (tag->prefix == NULL) + name = xmlParseNameAndCompare(ctxt, ctxt->name); + else + name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix); /* * We should definitely be at the ending "S? '>'" part @@ -9676,25 +9741,22 @@ xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, */ if (name != (xmlChar*)1) { if (name == NULL) name = BAD_CAST "unparsable"; - if ((line == 0) && (ctxt->node != NULL)) - line = ctxt->node->line; xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH, "Opening and ending tag mismatch: %s line %d and %s\n", - ctxt->name, line, name); + ctxt->name, tag->line, name); } /* * SAX: End of Tag */ -done: if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) && (!ctxt->disableSAX)) - ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI); + ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix, + tag->URI); spacePop(ctxt); - if (nsNr != 0) - nsPop(ctxt, nsNr); - return; + if (tag->nsNr != 0) + nsPop(ctxt, tag->nsNr); } /** @@ -9776,6 +9838,7 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { sl = l; count++; if (count > 50) { + SHRINK; GROW; if (ctxt->instate == XML_PARSER_EOF) { xmlFree(buf); @@ -9809,19 +9872,19 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) { } /** - * xmlParseContent: + * xmlParseContentInternal: * @ctxt: an XML parser context * - * Parse a content: - * - * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* + * Parse a content sequence. Stops at EOF or 'nameNr; + GROW; while ((RAW != 0) && - ((RAW != '<') || (NXT(1) != '/')) && (ctxt->instate != XML_PARSER_EOF)) { const xmlChar *test = CUR_PTR; unsigned int cons = ctxt->input->consumed; @@ -9855,7 +9918,13 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { * Fourth case : a sub-element. */ else if (*cur == '<') { - xmlParseElement(ctxt); + if (NXT(1) == '/') { + if (ctxt->nameNr <= nameNr) + break; + xmlParseElementEnd(ctxt); + } else { + xmlParseElementStart(ctxt); + } } /* @@ -9886,11 +9955,35 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { } } +/** + * xmlParseContent: + * @ctxt: an XML parser context + * + * Parse a content sequence. Stops at EOF or 'nameNr; + + xmlParseContentInternal(ctxt); + + if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) { + const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; + int line = ctxt->pushTab[ctxt->nameNr - 1].line; + xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, + "Premature end of data in tag %s line %d\n", + name, line, NULL); + } +} + /** * xmlParseElement: * @ctxt: an XML parser context * - * parse an XML element, this is highly recursive + * parse an XML element * * [39] element ::= EmptyElemTag | STag content ETag * @@ -9902,6 +9995,34 @@ xmlParseContent(xmlParserCtxtPtr ctxt) { void xmlParseElement(xmlParserCtxtPtr ctxt) { + if (xmlParseElementStart(ctxt) != 0) + return; + + xmlParseContentInternal(ctxt); + if (ctxt->instate == XML_PARSER_EOF) + return; + + if (CUR == 0) { + const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1]; + int line = ctxt->pushTab[ctxt->nameNr - 1].line; + xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, + "Premature end of data in tag %s line %d\n", + name, line, NULL); + return; + } + + xmlParseElementEnd(ctxt); +} + +/** + * xmlParseElementStart: + * @ctxt: an XML parser context + * + * Parse the start of an XML element. Returns -1 in case of error, 0 if an + * opening tag was parsed, 1 if an empty element was parsed. + */ +static int +xmlParseElementStart(xmlParserCtxtPtr ctxt) { const xmlChar *name; const xmlChar *prefix = NULL; const xmlChar *URI = NULL; @@ -9916,7 +10037,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { "Excessive depth in document: %d use XML_PARSE_HUGE option\n", xmlParserMaxDepth); xmlHaltParser(ctxt); - return; + return(-1); } /* Capture start position */ @@ -9943,12 +10064,12 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { name = xmlParseStartTag(ctxt); #endif /* LIBXML_SAX1_ENABLED */ if (ctxt->instate == XML_PARSER_EOF) - return; + return(-1); if (name == NULL) { spacePop(ctxt); - return; + return(-1); } - namePush(ctxt, name); + nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); ret = ctxt->node; #ifdef LIBXML_VALID_ENABLED @@ -9989,7 +10110,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { node_info.node = ret; xmlParserAddNodeInfo(ctxt, &node_info); } - return; + return(1); } if (RAW == '>') { NEXT1; @@ -10017,41 +10138,36 @@ xmlParseElement(xmlParserCtxtPtr ctxt) { node_info.node = ret; xmlParserAddNodeInfo(ctxt, &node_info); } - return; + return(-1); } - /* - * Parse the content of the element: - */ - xmlParseContent(ctxt); - if (ctxt->instate == XML_PARSER_EOF) - return; - if (!IS_BYTE_CHAR(RAW)) { - xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED, - "Premature end of data in tag %s line %d\n", - name, line, NULL); + return(0); +} - /* - * end of parsing of this node. - */ - nodePop(ctxt); - namePop(ctxt); - spacePop(ctxt); - if (nsNr != ctxt->nsNr) - nsPop(ctxt, ctxt->nsNr - nsNr); - return; - } +/** + * xmlParseElementEnd: + * @ctxt: an XML parser context + * + * Parse the end of an XML element. + */ +static void +xmlParseElementEnd(xmlParserCtxtPtr ctxt) { + xmlParserNodeInfo node_info; + xmlNodePtr ret = ctxt->node; + + if (ctxt->nameNr <= 0) + return; /* * parse the end of tag: 'sax2) { - xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen); + xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); namePop(ctxt); } #ifdef LIBXML_SAX1_ENABLED - else - xmlParseEndTag1(ctxt, line); + else + xmlParseEndTag1(ctxt, 0); #endif /* LIBXML_SAX1_ENABLED */ /* @@ -11321,6 +11437,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { const xmlChar *name; const xmlChar *prefix = NULL; const xmlChar *URI = NULL; + int line = ctxt->input->line; int nsNr = ctxt->nsNr; if ((avail < 2) && (ctxt->inputNr == 1)) @@ -11418,12 +11535,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { nodePop(ctxt); spacePop(ctxt); } - if (ctxt->sax2) - nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr); -#ifdef LIBXML_SAX1_ENABLED - else - namePush(ctxt, name); -#endif /* LIBXML_SAX1_ENABLED */ + nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr); ctxt->instate = XML_PARSER_CONTENT; ctxt->progressive = 1; @@ -11540,11 +11652,7 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) { } } if (ctxt->sax2) { - xmlParseEndTag2(ctxt, - (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3], - (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0, - (int) (ptrdiff_t) - ctxt->pushTab[ctxt->nameNr * 3 - 1], 0); + xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]); nameNsPop(ctxt); } #ifdef LIBXML_SAX1_ENABLED @@ -12206,12 +12314,12 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, } } res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); if (res < 0) { ctxt->errNo = XML_PARSER_EOF; xmlHaltParser(ctxt); return (XML_PARSER_EOF); } - xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur); #ifdef DEBUG_PUSH xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size); #endif @@ -12226,6 +12334,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, size_t current = ctxt->input->cur - ctxt->input->base; nbchars = xmlCharEncInput(in, terminate); + xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); if (nbchars < 0) { /* TODO 2.6.0 */ xmlGenericError(xmlGenericErrorContext, @@ -12233,7 +12342,6 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, xmlHaltParser(ctxt); return(XML_ERR_INVALID_ENCODING); } - xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current); } } } @@ -12368,13 +12476,6 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data, return(NULL); } ctxt->dictNames = 1; - ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *)); - if (ctxt->pushTab == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFreeParserInputBuffer(buf); - xmlFreeParserCtxt(ctxt); - return(NULL); - } if (sax != NULL) { #ifdef LIBXML_SAX1_ENABLED if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler) @@ -13163,7 +13264,7 @@ xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data, /** * xmlParseBalancedChunkMemory: - * @doc: the document the chunk pertains to + * @doc: the document the chunk pertains to (must not be NULL) * @sax: the SAX handler block (possibly NULL) * @user_data: The user data returned on SAX callbacks (possibly NULL) * @depth: Used for loop detection, use 0 @@ -13615,7 +13716,7 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen, #ifdef LIBXML_SAX1_ENABLED /** * xmlParseBalancedChunkMemoryRecover: - * @doc: the document the chunk pertains to + * @doc: the document the chunk pertains to (must not be NULL) * @sax: the SAX handler block (possibly NULL) * @user_data: The user data returned on SAX callbacks (possibly NULL) * @depth: Used for loop detection, use 0 @@ -13687,6 +13788,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } else { xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL); } + /* doc == NULL is only supported for historic reasons */ if (doc != NULL) { newDoc->intSubset = doc->intSubset; newDoc->extSubset = doc->extSubset; @@ -13703,6 +13805,7 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, } xmlAddChild((xmlNodePtr) newDoc, newRoot); nodePush(ctxt, newRoot); + /* doc == NULL is only supported for historic reasons */ if (doc == NULL) { ctxt->myDoc = newDoc; } else { @@ -13772,8 +13875,8 @@ xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax, xmlFreeParserCtxt(ctxt); newDoc->intSubset = NULL; newDoc->extSubset = NULL; - if(doc != NULL) - newDoc->oldNs = NULL; + /* This leaks the namespace list if doc == NULL */ + newDoc->oldNs = NULL; xmlFreeDoc(newDoc); return(ret); @@ -14055,7 +14158,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, if ((ctxt->wellFormed) || recovery) { ret = ctxt->myDoc; - if (ret != NULL) { + if ((ret != NULL) && (ctxt->input->buf != NULL)) { if (ctxt->input->buf->compressed > 0) ret->compression = 9; else @@ -14586,8 +14689,9 @@ xmlInitParser(void) { if (xmlParserInitialized != 0) return; -#if defined(WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL)) - atexit(xmlCleanupParser); +#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL)) + if (xmlFree == free) + atexit(xmlCleanupParser); #endif #ifdef LIBXML_THREAD_ENABLED @@ -14668,6 +14772,20 @@ xmlCleanupParser(void) { xmlParserInitialized = 0; } +#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \ + !defined(_WIN32) +static void +ATTRIBUTE_DESTRUCTOR +xmlDestructor(void) { + /* + * Calling custom deallocation functions in a destructor can cause + * problems, for example with Nokogiri. + */ + if (xmlFree == free) + xmlCleanupParser(); +} +#endif + /************************************************************************ * * * New set (2.6.0) of simpler and more flexible APIs * @@ -14756,7 +14874,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->vctxt.warning = xmlParserValidityWarning; #endif ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; @@ -14822,16 +14939,6 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk, xmlCtxtReset(ctxt); - if (ctxt->pushTab == NULL) { - ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * - sizeof(xmlChar *)); - if (ctxt->pushTab == NULL) { - xmlErrMemory(ctxt, NULL); - xmlFreeParserInputBuffer(buf); - return(1); - } - } - if (filename == NULL) { ctxt->directory = NULL; } else { diff --git a/third_party/libxml/src/parserInternals.c b/third_party/libxml/src/parserInternals.c index 8fc3d63c3b03..0d2c16a7b196 100644 --- a/third_party/libxml/src/parserInternals.c +++ b/third_party/libxml/src/parserInternals.c @@ -524,8 +524,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt) } else /* 1-byte code */ ctxt->input->cur++; - - ctxt->nbChars++; } else { /* * Assume it's a fixed length encoding (1) with @@ -538,7 +536,6 @@ xmlNextChar(xmlParserCtxtPtr ctxt) } else ctxt->input->col++; ctxt->input->cur++; - ctxt->nbChars++; } if (*ctxt->input->cur == 0) xmlParserInputGrow(ctxt->input, INPUT_CHUNK); @@ -682,7 +679,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { } if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { - ctxt->nbChars++; ctxt->input->cur++; } return(0xA); @@ -698,7 +694,6 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { *len = 1; if (*ctxt->input->cur == 0xD) { if (ctxt->input->cur[1] == 0xA) { - ctxt->nbChars++; ctxt->input->cur++; } return(0xA); @@ -1163,6 +1158,11 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, * Note: this is a bit dangerous, but that's what it * takes to use nearly compatible signature for different * encodings. + * + * FIXME: Encoders might buffer partial byte sequences, so + * this probably can't work. We should return an error and + * make sure that callers never try to switch the encoding + * twice. */ xmlCharEncCloseFunc(input->buf->encoder); input->buf->encoder = handler; @@ -1758,7 +1758,6 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->options |= XML_PARSE_NOENT; } ctxt->record_info = 0; - ctxt->nbChars = 0; ctxt->checkIndex = 0; ctxt->inSubset = 0; ctxt->errNo = XML_ERR_OK; diff --git a/third_party/libxml/src/runtest.c b/third_party/libxml/src/runtest.c index 321213095960..321d016c0988 100644 --- a/third_party/libxml/src/runtest.c +++ b/third_party/libxml/src/runtest.c @@ -105,6 +105,7 @@ struct testDesc { }; static int update_results = 0; +static char* temp_directory = NULL; static int checkTestFile(const char *filename); #if defined(_WIN32) && !defined(__CYGWIN__) @@ -1699,7 +1700,7 @@ saxParseTest(const char *filename, const char *result, char *temp; nb_tests++; - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "out of memory\n"); fatalError(); @@ -1818,7 +1819,7 @@ oldParseTest(const char *filename, const char *result, #endif if (doc == NULL) return(1); - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "out of memory\n"); fatalError(); @@ -2030,7 +2031,7 @@ noentParseTest(const char *filename, const char *result, doc = xmlReadFile(filename, NULL, options); if (doc == NULL) return(1); - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -2107,16 +2108,16 @@ errParseTest(const char *filename, const char *result, const char *err, xmlDocDumpMemory(doc, (xmlChar **) &base, &size); } res = compareFileMem(result, base, size); - if (res != 0) { - fprintf(stderr, "Result for %s failed in %s\n", filename, result); - return(-1); - } } if (doc != NULL) { if (base != NULL) xmlFree((char *)base); xmlFreeDoc(doc); } + if (res != 0) { + fprintf(stderr, "Result for %s failed in %s\n", filename, result); + return(-1); + } if (err != NULL) { res = compareFileMem(err, testErrors, testErrorsSize); if (res != 0) { @@ -2177,7 +2178,7 @@ streamProcessTest(const char *filename, const char *result, const char *err, nb_tests++; if (result != NULL) { - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -2406,7 +2407,7 @@ xpathCommonTest(const char *filename, const char *result, int len, ret = 0; char *temp; - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -2605,7 +2606,7 @@ xmlidDocTest(const char *filename, return(-1); } - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -2703,7 +2704,7 @@ uriCommonTest(const char *filename, char str[1024]; int res = 0, i, ret; - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -3007,7 +3008,7 @@ schemasOneTest(const char *sch, return(-1); } - temp = resultFilename(result, "", ".res"); + temp = resultFilename(result, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -3178,7 +3179,7 @@ rngOneTest(const char *sch, return(-1); } - temp = resultFilename(result, "", ".res"); + temp = resultFilename(result, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -3528,7 +3529,7 @@ patternTest(const char *filename, fprintf(stderr, "Failed to open %s\n", filename); return(-1); } - temp = resultFilename(filename, "", ".res"); + temp = resultFilename(filename, temp_directory, ".res"); if (temp == NULL) { fprintf(stderr, "Out of memory\n"); fatalError(); @@ -4565,6 +4566,8 @@ main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { update_results = 1; else if (!strcmp(argv[a], "-quiet")) tests_quiet = 1; + else if (!strcmp(argv[a], "--out")) + temp_directory = argv[++a]; else { for (i = 0; testDescriptions[i].func != NULL; i++) { if (strstr(testDescriptions[i].desc, argv[a])) { diff --git a/third_party/libxml/src/testapi.c b/third_party/libxml/src/testapi.c index 8a4e23e33252..6f19c6fc9676 100644 --- a/third_party/libxml/src/testapi.c +++ b/third_party/libxml/src/testapi.c @@ -2843,7 +2843,7 @@ test_htmlDocContentDumpFormatOutput(void) { int n_buf; xmlDocPtr cur; /* the document */ int n_cur; - char * encoding; /* the encoding string */ + char * encoding; /* the encoding string (unused) */ int n_encoding; int format; /* should formatting spaces been added */ int n_format; @@ -2896,7 +2896,7 @@ test_htmlDocContentDumpOutput(void) { int n_buf; xmlDocPtr cur; /* the document */ int n_cur; - char * encoding; /* the encoding string */ + char * encoding; /* the encoding string (unused) */ int n_encoding; for (n_buf = 0;n_buf < gen_nb_xmlOutputBufferPtr;n_buf++) { @@ -3397,7 +3397,7 @@ test_htmlNodeDumpFormatOutput(void) { int n_doc; xmlNodePtr cur; /* the current node */ int n_cur; - char * encoding; /* the encoding string */ + char * encoding; /* the encoding string (unused) */ int n_encoding; int format; /* should formatting spaces been added */ int n_format; @@ -3457,7 +3457,7 @@ test_htmlNodeDumpOutput(void) { int n_doc; xmlNodePtr cur; /* the current node */ int n_cur; - char * encoding; /* the encoding string */ + char * encoding; /* the encoding string (unused) */ int n_encoding; for (n_buf = 0;n_buf < gen_nb_xmlOutputBufferPtr;n_buf++) { @@ -13611,7 +13611,7 @@ test_xmlParseBalancedChunkMemory(void) { #ifdef LIBXML_SAX1_ENABLED int mem_base; int ret_val; - xmlDocPtr doc; /* the document the chunk pertains to */ + xmlDocPtr doc; /* the document the chunk pertains to (must not be NULL) */ int n_doc; xmlSAXHandlerPtr sax; /* the SAX handler block (possibly NULL) */ int n_sax; @@ -13687,7 +13687,7 @@ test_xmlParseBalancedChunkMemoryRecover(void) { #ifdef LIBXML_SAX1_ENABLED int mem_base; int ret_val; - xmlDocPtr doc; /* the document the chunk pertains to */ + xmlDocPtr doc; /* the document the chunk pertains to (must not be NULL) */ int n_doc; xmlSAXHandlerPtr sax; /* the SAX handler block (possibly NULL) */ int n_sax; @@ -29237,6 +29237,33 @@ test_xmlPopInputCallbacks(void) { } +static int +test_xmlPopOutputCallbacks(void) { + int test_ret = 0; + +#if defined(LIBXML_OUTPUT_ENABLED) + int mem_base; + int ret_val; + + mem_base = xmlMemBlocks(); + + ret_val = xmlPopOutputCallbacks(); + desret_int(ret_val); + call_tests++; + xmlResetLastError(); + if (mem_base != xmlMemBlocks()) { + printf("Leak of %d blocks found in xmlPopOutputCallbacks", + xmlMemBlocks() - mem_base); + test_ret++; + printf("\n"); + } + function_tests++; +#endif + + return(test_ret); +} + + static int test_xmlRegisterDefaultInputCallbacks(void) { int test_ret = 0; @@ -29313,7 +29340,7 @@ static int test_xmlIO(void) { int test_ret = 0; - if (quiet == 0) printf("Testing xmlIO : 40 of 50 functions ...\n"); + if (quiet == 0) printf("Testing xmlIO : 41 of 51 functions ...\n"); test_ret += test_xmlAllocOutputBuffer(); test_ret += test_xmlAllocParserInputBuffer(); test_ret += test_xmlCheckFilename(); @@ -29354,6 +29381,7 @@ test_xmlIO(void) { test_ret += test_xmlParserInputBufferPush(); test_ret += test_xmlParserInputBufferRead(); test_ret += test_xmlPopInputCallbacks(); + test_ret += test_xmlPopOutputCallbacks(); test_ret += test_xmlRegisterDefaultInputCallbacks(); test_ret += test_xmlRegisterDefaultOutputCallbacks(); test_ret += test_xmlRegisterHTTPPostCallbacks(); @@ -34240,27 +34268,27 @@ test_xmlSaveTree(void) { long ret_val; xmlSaveCtxtPtr ctxt; /* a document saving context */ int n_ctxt; - xmlNodePtr node; /* the top node of the subtree to save */ - int n_node; + xmlNodePtr cur; /* */ + int n_cur; for (n_ctxt = 0;n_ctxt < gen_nb_xmlSaveCtxtPtr;n_ctxt++) { - for (n_node = 0;n_node < gen_nb_xmlNodePtr;n_node++) { + for (n_cur = 0;n_cur < gen_nb_xmlNodePtr;n_cur++) { mem_base = xmlMemBlocks(); ctxt = gen_xmlSaveCtxtPtr(n_ctxt, 0); - node = gen_xmlNodePtr(n_node, 1); + cur = gen_xmlNodePtr(n_cur, 1); - ret_val = xmlSaveTree(ctxt, node); + ret_val = xmlSaveTree(ctxt, cur); desret_long(ret_val); call_tests++; des_xmlSaveCtxtPtr(n_ctxt, ctxt, 0); - des_xmlNodePtr(n_node, node, 1); + des_xmlNodePtr(n_cur, cur, 1); xmlResetLastError(); if (mem_base != xmlMemBlocks()) { printf("Leak of %d blocks found in xmlSaveTree", xmlMemBlocks() - mem_base); test_ret++; printf(" %d", n_ctxt); - printf(" %d", n_node); + printf(" %d", n_cur); printf("\n"); } } diff --git a/third_party/libxml/src/tree.c b/third_party/libxml/src/tree.c index 54bfdc486292..5c22948b74b3 100644 --- a/third_party/libxml/src/tree.c +++ b/third_party/libxml/src/tree.c @@ -1310,6 +1310,16 @@ xmlStringLenGetNodeList(const xmlDoc *doc, const xmlChar *value, int len) { else tmp = 0; while (tmp != ';') { /* Non input consuming loop */ + /* + * If you find an integer overflow here when fuzzing, + * the bug is probably elsewhere. This function should + * only receive entities that were already validated by + * the parser, typically by xmlParseAttValueComplex + * calling xmlStringDecodeEntities. + * + * So it's better *not* to check for overflow to + * potentially discover new bugs. + */ if ((tmp >= '0') && (tmp <= '9')) charval = charval * 16 + (tmp - '0'); else if ((tmp >= 'a') && (tmp <= 'f')) @@ -1338,6 +1348,7 @@ xmlStringLenGetNodeList(const xmlDoc *doc, const xmlChar *value, int len) { else tmp = 0; while (tmp != ';') { /* Non input consuming loops */ + /* Don't check for integer overflow, see above. */ if ((tmp >= '0') && (tmp <= '9')) charval = charval * 10 + (tmp - '0'); else { @@ -1517,6 +1528,7 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { cur += 3; tmp = *cur; while (tmp != ';') { /* Non input consuming loop */ + /* Don't check for integer overflow, see above. */ if ((tmp >= '0') && (tmp <= '9')) charval = charval * 16 + (tmp - '0'); else if ((tmp >= 'a') && (tmp <= 'f')) @@ -1539,6 +1551,7 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { cur += 2; tmp = *cur; while (tmp != ';') { /* Non input consuming loops */ + /* Don't check for integer overflow, see above. */ if ((tmp >= '0') && (tmp <= '9')) charval = charval * 10 + (tmp - '0'); else { @@ -1649,6 +1662,10 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { if (!xmlBufIsEmpty(buf)) { node = xmlNewDocText(doc, NULL); + if (node == NULL) { + xmlBufFree(buf); + return(NULL); + } node->content = xmlBufDetach(buf); if (last == NULL) { @@ -1884,12 +1901,6 @@ xmlNewPropInternal(xmlNodePtr node, xmlNsPtr ns, if (value != NULL) { xmlNodePtr tmp; - if(!xmlCheckUTF8(value)) { - xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) doc, - NULL); - if (doc != NULL) - doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); - } cur->children = xmlNewDocText(doc, value); cur->last = NULL; tmp = cur->children; @@ -2009,6 +2020,11 @@ xmlNewNsPropEatName(xmlNodePtr node, xmlNsPtr ns, xmlChar *name, * @value: the value of the attribute * * Create a new property carried by a document. + * NOTE: @value is supposed to be a piece of XML CDATA, so it allows entity + * references, but XML special chars need to be escaped first by using + * xmlEncodeEntitiesReentrant(). Use xmlNewProp() if you don't need + * entities support. + * * Returns a pointer to the attribute */ xmlAttrPtr @@ -4554,6 +4570,7 @@ xmlCopyDoc(xmlDocPtr doc, int recursive) { if (doc == NULL) return(NULL); ret = xmlNewDoc(doc->version); if (ret == NULL) return(NULL); + ret->type = doc->type; if (doc->name != NULL) ret->name = xmlMemStrdup(doc->name); if (doc->encoding != NULL) @@ -4876,7 +4893,9 @@ xmlGetNodePath(const xmlNode *node) } next = ((xmlAttrPtr) cur)->parent; } else { - next = cur->parent; + xmlFree(buf); + xmlFree(buffer); + return (NULL); } /* @@ -6571,6 +6590,16 @@ xmlGetPropNodeInternal(const xmlNode *node, const xmlChar *name, attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, elemQName, name, NULL); } + } else if (xmlStrEqual(nsName, XML_XML_NAMESPACE)) { + /* + * The XML namespace must be bound to prefix 'xml'. + */ + attrDecl = xmlGetDtdQAttrDesc(doc->intSubset, + elemQName, name, BAD_CAST "xml"); + if ((attrDecl == NULL) && (doc->extSubset != NULL)) { + attrDecl = xmlGetDtdQAttrDesc(doc->extSubset, + elemQName, name, BAD_CAST "xml"); + } } else { xmlNsPtr *nsList, *cur; @@ -6917,12 +6946,6 @@ xmlSetNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name, if (value != NULL) { xmlNodePtr tmp; - if(!xmlCheckUTF8(value)) { - xmlTreeErr(XML_TREE_NOT_UTF8, (xmlNodePtr) node->doc, - NULL); - if (node->doc != NULL) - node->doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); - } prop->children = xmlNewDocText(node->doc, value); prop->last = NULL; tmp = prop->children; diff --git a/third_party/libxml/src/uri.c b/third_party/libxml/src/uri.c index dfe175eade73..cbc2d00ab6df 100644 --- a/third_party/libxml/src/uri.c +++ b/third_party/libxml/src/uri.c @@ -11,6 +11,7 @@ #define IN_LIBXML #include "libxml.h" +#include #include #include @@ -329,9 +330,14 @@ xmlParse3986Port(xmlURIPtr uri, const char **str) if (ISA_DIGIT(cur)) { while (ISA_DIGIT(cur)) { - port = port * 10 + (*cur - '0'); - if (port > 99999999) - port = 99999999; + int digit = *cur - '0'; + + if (port > INT_MAX / 10) + return(1); + port *= 10; + if (port > INT_MAX - digit) + return(1); + port += digit; cur++; } @@ -1750,11 +1756,6 @@ xmlURIEscape(const xmlChar * str) xmlURIPtr uri; int ret2; -#define NULLCHK(p) if(!p) { \ - xmlURIErrMemory("escaping URI value\n"); \ - xmlFreeURI(uri); \ - return NULL; } \ - if (str == NULL) return (NULL); @@ -1776,6 +1777,12 @@ xmlURIEscape(const xmlChar * str) ret = NULL; +#define NULLCHK(p) if(!p) { \ + xmlURIErrMemory("escaping URI value\n"); \ + xmlFreeURI(uri); \ + xmlFree(ret); \ + return NULL; } \ + if (uri->scheme) { segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); NULLCHK(segment) @@ -1796,7 +1803,7 @@ xmlURIEscape(const xmlChar * str) if (uri->user) { segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); NULLCHK(segment) - ret = xmlStrcat(ret,BAD_CAST "//"); + ret = xmlStrcat(ret,BAD_CAST "//"); ret = xmlStrcat(ret, segment); ret = xmlStrcat(ret, BAD_CAST "@"); xmlFree(segment); @@ -1805,8 +1812,8 @@ xmlURIEscape(const xmlChar * str) if (uri->server) { segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); NULLCHK(segment) - if (uri->user == NULL) - ret = xmlStrcat(ret, BAD_CAST "//"); + if (uri->user == NULL) + ret = xmlStrcat(ret, BAD_CAST "//"); ret = xmlStrcat(ret, segment); xmlFree(segment); } diff --git a/third_party/libxml/src/win32/Makefile.msvc b/third_party/libxml/src/win32/Makefile.msvc index f6a0182d55d2..725b0ca1d689 100644 --- a/third_party/libxml/src/win32/Makefile.msvc +++ b/third_party/libxml/src/win32/Makefile.msvc @@ -106,6 +106,11 @@ CFLAGS = $(CFLAGS) /D "NDEBUG" /O2 LDFLAGS = $(LDFLAGS) !endif +# append CFLAGS etc. passed on command line +CPPFLAGS = $(CPPFLAGS) $(EXTRA_CPPFLAGS) +CFLAGS = $(CFLAGS) $(EXTRA_CFLAGS) +LDFLAGS = $(LDFLAGS) $(EXTRA_LDFLAGS) + # Libxml object files. XML_OBJS = $(XML_INTDIR)\buf.obj\ $(XML_INTDIR)\c14n.obj\ diff --git a/third_party/libxml/src/win32/configure.js b/third_party/libxml/src/win32/configure.js index e2ab31da2d36..cec64c539080 100644 --- a/third_party/libxml/src/win32/configure.js +++ b/third_party/libxml/src/win32/configure.js @@ -208,15 +208,15 @@ function discoverVersion() while (cf.AtEndOfStream != true) { ln = cf.ReadLine(); s = new String(ln); - if (s.search(/^LIBXML_MAJOR_VERSION=/) != -1) { - vf.WriteLine(s); - verMajor = s.substring(s.indexOf("=") + 1, s.length); - } else if(s.search(/^LIBXML_MINOR_VERSION=/) != -1) { - vf.WriteLine(s); - verMinor = s.substring(s.indexOf("=") + 1, s.length); - } else if(s.search(/^LIBXML_MICRO_VERSION=/) != -1) { - vf.WriteLine(s); - verMicro = s.substring(s.indexOf("=") + 1, s.length); + if (m = s.match(/^m4_define\(\[MAJOR_VERSION\], (\w+)\)/)) { + vf.WriteLine("LIBXML_MAJOR_VERSION=" + m[1]); + verMajor = m[1]; + } else if(m = s.match(/^m4_define\(\[MINOR_VERSION\], (\w+)\)/)) { + vf.WriteLine("LIBXML_MINOR_VERSION=" + m[1]); + verMinor = m[1]; + } else if(m = s.match(/^m4_define\(\[MICRO_VERSION\], (\w+)\)/)) { + vf.WriteLine("LIBXML_MICRO_VERSION=" + m[1]); + verMicro = m[1]; } else if(s.search(/^LIBXML_MICRO_VERSION_SUFFIX=/) != -1) { vf.WriteLine(s); verMicroSuffix = s.substring(s.indexOf("=") + 1, s.length); diff --git a/third_party/libxml/src/xmlIO.c b/third_party/libxml/src/xmlIO.c index 8e1d3b16ca46..08f7907ef125 100644 --- a/third_party/libxml/src/xmlIO.c +++ b/third_party/libxml/src/xmlIO.c @@ -3439,7 +3439,7 @@ xmlOutputBufferWrite(xmlOutputBufferPtr out, int len, const char *buf) { if (out->writecallback) nbchars = xmlBufUse(out->conv); else - nbchars = ret; + nbchars = ret >= 0 ? ret : 0; } else { ret = xmlBufAdd(out->buffer, (const xmlChar *) buf, chunk); if (ret != 0) @@ -3637,7 +3637,7 @@ xmlOutputBufferWriteEscape(xmlOutputBufferPtr out, const xmlChar *str, if (out->writecallback) nbchars = xmlBufUse(out->conv); else - nbchars = ret; + nbchars = ret >= 0 ? ret : 0; } else { ret = escaping(xmlBufEnd(out->buffer), &chunk, str, &cons); if ((ret < 0) || (chunk == 0)) /* chunk==0 => nothing done */ diff --git a/third_party/libxml/src/xmlreader.c b/third_party/libxml/src/xmlreader.c index b603c7208b27..72e40b032941 100644 --- a/third_party/libxml/src/xmlreader.c +++ b/third_party/libxml/src/xmlreader.c @@ -48,6 +48,13 @@ #define MAX_ERR_MSG_SIZE 64000 +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +/* Keeping free objects can hide memory errors. */ +#define MAX_FREE_NODES 1 +#else +#define MAX_FREE_NODES 100 +#endif + /* * The following VA_COPY was coded following an example in * the Samba project. It may not be sufficient for some @@ -278,6 +285,59 @@ xmlTextReaderRemoveID(xmlDocPtr doc, xmlAttrPtr attr) { return(0); } +/** + * xmlTextReaderWalkRemoveRef: + * @data: Contents of current link + * @user: Value supplied by the user + * + * Returns 0 to abort the walk or 1 to continue + */ +static int +xmlTextReaderWalkRemoveRef(const void *data, void *user) +{ + xmlRefPtr ref = (xmlRefPtr)data; + xmlAttrPtr attr = (xmlAttrPtr)user; + + if (ref->attr == attr) { /* Matched: remove and terminate walk */ + ref->name = xmlStrdup(attr->name); + ref->attr = NULL; + return 0; + } + return 1; +} + +/** + * xmlTextReaderRemoveRef: + * @doc: the document + * @attr: the attribute + * + * Remove the given attribute from the Ref table maintained internally. + * + * Returns -1 if the lookup failed and 0 otherwise + */ +static int +xmlTextReaderRemoveRef(xmlDocPtr doc, xmlAttrPtr attr) { + xmlListPtr ref_list; + xmlRefTablePtr table; + xmlChar *ID; + + if (doc == NULL) return(-1); + if (attr == NULL) return(-1); + table = (xmlRefTablePtr) doc->refs; + if (table == NULL) + return(-1); + + ID = xmlNodeListGetString(doc, attr->children, 1); + if (ID == NULL) + return(-1); + ref_list = xmlHashLookup(table, ID); + xmlFree(ID); + if(ref_list == NULL) + return (-1); + xmlListWalk(ref_list, xmlTextReaderWalkRemoveRef, attr); + return(0); +} + /** * xmlTextReaderFreeProp: * @reader: the xmlTextReaderPtr used @@ -299,18 +359,20 @@ xmlTextReaderFreeProp(xmlTextReaderPtr reader, xmlAttrPtr cur) { xmlDeregisterNodeDefaultValue((xmlNodePtr) cur); /* Check for ID removal -> leading to invalid references ! */ - if ((cur->parent != NULL) && (cur->parent->doc != NULL) && - ((cur->parent->doc->intSubset != NULL) || - (cur->parent->doc->extSubset != NULL))) { + if ((cur->parent != NULL) && (cur->parent->doc != NULL)) { if (xmlIsID(cur->parent->doc, cur->parent, cur)) xmlTextReaderRemoveID(cur->parent->doc, cur); + if (((cur->parent->doc->intSubset != NULL) || + (cur->parent->doc->extSubset != NULL)) && + (xmlIsRef(cur->parent->doc, cur->parent, cur))) + xmlTextReaderRemoveRef(cur->parent->doc, cur); } if (cur->children != NULL) xmlTextReaderFreeNodeList(reader, cur->children); DICT_FREE(cur->name); if ((reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeAttrsNr < 100)) { + (reader->ctxt->freeAttrsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeAttrs; reader->ctxt->freeAttrs = cur; reader->ctxt->freeAttrsNr++; @@ -411,7 +473,7 @@ xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) { if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeElemsNr < 100)) { + (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; @@ -499,7 +561,7 @@ xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) { if (((cur->type == XML_ELEMENT_NODE) || (cur->type == XML_TEXT_NODE)) && (reader != NULL) && (reader->ctxt != NULL) && - (reader->ctxt->freeElemsNr < 100)) { + (reader->ctxt->freeElemsNr < MAX_FREE_NODES)) { cur->next = reader->ctxt->freeElems; reader->ctxt->freeElems = cur; reader->ctxt->freeElemsNr++; @@ -1436,6 +1498,8 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { (reader->node->prev->type != XML_DTD_NODE)) { xmlNodePtr tmp = reader->node->prev; if ((tmp->extra & NODE_IS_PRESERVED) == 0) { + if (oldnode == tmp) + oldnode = NULL; xmlUnlinkNode(tmp); xmlTextReaderFreeNode(reader, tmp); } @@ -1521,7 +1585,8 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { /* * Handle XInclude if asked for */ - if ((reader->xinclude) && (reader->node != NULL) && + if ((reader->xinclude) && (reader->in_xinclude == 0) && + (reader->node != NULL) && (reader->node->type == XML_ELEMENT_NODE) && (reader->node->ns != NULL) && ((xmlStrEqual(reader->node->ns->href, XINCLUDE_NS)) || @@ -2260,6 +2325,7 @@ xmlFreeTextReader(xmlTextReaderPtr reader) { if (reader->ctxt != NULL) { if (reader->dict == reader->ctxt->dict) reader->dict = NULL; +#ifdef LIBXML_VALID_ENABLED if ((reader->ctxt->vctxt.vstateTab != NULL) && (reader->ctxt->vctxt.vstateMax > 0)){ #ifdef LIBXML_REGEXP_ENABLED @@ -2270,6 +2336,7 @@ xmlFreeTextReader(xmlTextReaderPtr reader) { reader->ctxt->vctxt.vstateTab = NULL; reader->ctxt->vctxt.vstateMax = 0; } +#endif /* LIBXML_VALID_ENABLED */ if (reader->ctxt->myDoc != NULL) { if (reader->preserve == 0) xmlTextReaderFreeDoc(reader, reader->ctxt->myDoc); diff --git a/third_party/libxml/src/xmlsave.c b/third_party/libxml/src/xmlsave.c index 0e698bbdd1c2..489505f4865a 100644 --- a/third_party/libxml/src/xmlsave.c +++ b/third_party/libxml/src/xmlsave.c @@ -590,7 +590,6 @@ static int xmlSaveClearEncoding(xmlSaveCtxtPtr ctxt) { static void xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); #endif -static void xmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); static void xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); static int xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur); @@ -705,6 +704,7 @@ xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { static void xmlDtdDumpOutput(xmlSaveCtxtPtr ctxt, xmlDtdPtr dtd) { xmlOutputBufferPtr buf; + xmlNodePtr cur; int format, level; if (dtd == NULL) return; @@ -742,7 +742,9 @@ xmlDtdDumpOutput(xmlSaveCtxtPtr ctxt, xmlDtdPtr dtd) { level = ctxt->level; ctxt->format = 0; ctxt->level = -1; - xmlNodeListDumpOutput(ctxt, dtd->children); + for (cur = dtd->children; cur != NULL; cur = cur->next) { + xmlNodeDumpOutputInternal(ctxt, cur); + } ctxt->format = format; ctxt->level = level; xmlOutputBufferWrite(buf, 2, "]>"); @@ -776,58 +778,9 @@ xmlAttrDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { xmlOutputBufferWrite(buf, 1, "\""); } -/** - * xmlAttrListDumpOutput: - * @buf: the XML buffer output - * @doc: the document - * @cur: the first attribute pointer - * @encoding: an optional encoding string - * - * Dump a list of XML attributes - */ -static void -xmlAttrListDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { - if (cur == NULL) return; - while (cur != NULL) { - xmlAttrDumpOutput(ctxt, cur); - cur = cur->next; - } -} - - - -/** - * xmlNodeListDumpOutput: - * @cur: the first node - * - * Dump an XML node list, recursive behaviour, children are printed too. - */ -static void -xmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { - xmlOutputBufferPtr buf; - - if (cur == NULL) return; - buf = ctxt->buf; - while (cur != NULL) { - if ((ctxt->format == 1) && (xmlIndentTreeOutput) && - ((cur->type == XML_ELEMENT_NODE) || - (cur->type == XML_COMMENT_NODE) || - (cur->type == XML_PI_NODE))) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level), - ctxt->indent); - xmlNodeDumpOutputInternal(ctxt, cur); - if (ctxt->format == 1) { - xmlOutputBufferWrite(buf, 1, "\n"); - } - cur = cur->next; - } -} - #ifdef LIBXML_HTML_ENABLED /** - * xmlNodeDumpOutputInternal: + * htmlNodeDumpOutputInternal: * @cur: the current node * * Dump an HTML node, recursive behaviour, children are printed too. @@ -893,57 +846,126 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { */ static void xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { - int format; - xmlNodePtr tmp; + int format = ctxt->format; + xmlNodePtr tmp, root, unformattedNode = NULL, parent; + xmlAttrPtr attr; xmlChar *start, *end; xmlOutputBufferPtr buf; if (cur == NULL) return; buf = ctxt->buf; - if (cur->type == XML_XINCLUDE_START) - return; - if (cur->type == XML_XINCLUDE_END) - return; - if ((cur->type == XML_DOCUMENT_NODE) || - (cur->type == XML_HTML_DOCUMENT_NODE)) { - xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); - return; - } -#ifdef LIBXML_HTML_ENABLED - if (ctxt->options & XML_SAVE_XHTML) { - xhtmlNodeDumpOutput(ctxt, cur); - return; - } - if (((cur->type != XML_NAMESPACE_DECL) && (cur->doc != NULL) && - (cur->doc->type == XML_HTML_DOCUMENT_NODE) && - ((ctxt->options & XML_SAVE_AS_XML) == 0)) || - (ctxt->options & XML_SAVE_AS_HTML)) { - htmlNodeDumpOutputInternal(ctxt, cur); - return; - } -#endif - if (cur->type == XML_DTD_NODE) { - xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); - return; - } - if (cur->type == XML_DOCUMENT_FRAG_NODE) { - xmlNodeListDumpOutput(ctxt, cur->children); - return; - } - if (cur->type == XML_ELEMENT_DECL) { - xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); - return; - } - if (cur->type == XML_ATTRIBUTE_DECL) { - xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); - return; - } - if (cur->type == XML_ENTITY_DECL) { - xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); - return; - } - if (cur->type == XML_TEXT_NODE) { - if (cur->content != NULL) { + + root = cur; + parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); + break; + + case XML_DTD_NODE: + xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); + break; + + case XML_DOCUMENT_FRAG_NODE: + /* Always validate cur->parent when descending. */ + if ((cur->parent == parent) && (cur->children != NULL)) { + parent = cur; + cur = cur->children; + continue; + } + break; + + case XML_ELEMENT_DECL: + xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); + break; + + case XML_ATTRIBUTE_DECL: + xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); + break; + + case XML_ENTITY_DECL: + xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); + break; + + case XML_ELEMENT_NODE: + if ((cur != root) && (ctxt->format == 1) && + (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this + * case. + */ + if ((cur->parent != parent) && (cur->children != NULL)) { + xmlNodeDumpOutputInternal(ctxt, cur); + break; + } + + xmlOutputBufferWrite(buf, 1, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); + for (attr = cur->properties; attr != NULL; attr = attr->next) + xmlAttrDumpOutput(ctxt, attr); + + if (cur->children == NULL) { + if ((ctxt->options & XML_SAVE_NO_EMPTY) == 0) { + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + xmlOutputBufferWrite(buf, 2, "/>"); + } else { + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 1); + xmlOutputBufferWrite(buf, 3, ">ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, + (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + xmlOutputBufferWrite(buf, 1, ">"); + } + } else { + if (ctxt->format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_CDATA_SECTION_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + ctxt->format = 0; + unformattedNode = cur; + break; + } + tmp = tmp->next; + } + } + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 1); + xmlOutputBufferWrite(buf, 1, ">"); + if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); + if (ctxt->level >= 0) ctxt->level++; + parent = cur; + cur = cur->children; + continue; + } + + break; + + case XML_TEXT_NODE: + if (cur->content == NULL) + break; if (cur->name != xmlStringTextNoenc) { xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); } else { @@ -952,139 +974,136 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { */ xmlOutputBufferWriteString(buf, (const char *) cur->content); } - } + break; - return; - } - if (cur->type == XML_PI_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWrite(buf, 2, "name); - if (cur->content != NULL) { - if (ctxt->format == 2) - xmlOutputBufferWriteWSNonSig(ctxt, 0); - else - xmlOutputBufferWrite(buf, 1, " "); - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - xmlOutputBufferWrite(buf, 2, "?>"); - } else { - xmlOutputBufferWrite(buf, 2, "name); - if (ctxt->format == 2) - xmlOutputBufferWriteWSNonSig(ctxt, 0); - xmlOutputBufferWrite(buf, 2, "?>"); - } - return; - } - if (cur->type == XML_COMMENT_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWrite(buf, 4, ""); - } - return; - } - if (cur->type == XML_ENTITY_REF_NODE) { - xmlOutputBufferWrite(buf, 1, "&"); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWrite(buf, 1, ";"); - return; - } - if (cur->type == XML_CDATA_SECTION_NODE) { - if (cur->content == NULL || *cur->content == '\0') { - xmlOutputBufferWrite(buf, 12, ""); - } else { - start = end = cur->content; - while (*end != '\0') { - if ((*end == ']') && (*(end + 1) == ']') && - (*(end + 2) == '>')) { - end = end + 2; - xmlOutputBufferWrite(buf, 9, ""); - start = end; - } - end++; - } - if (start != end) { - xmlOutputBufferWrite(buf, 9, ""); - } - } - return; - } - if (cur->type == XML_ATTRIBUTE_NODE) { - xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); - return; - } - if (cur->type == XML_NAMESPACE_DECL) { - xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); - return; - } + case XML_PI_NODE: + if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + if (cur->content != NULL) { + xmlOutputBufferWrite(buf, 2, "name); + if (cur->content != NULL) { + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + else + xmlOutputBufferWrite(buf, 1, " "); + xmlOutputBufferWriteString(buf, + (const char *)cur->content); + } + xmlOutputBufferWrite(buf, 2, "?>"); + } else { + xmlOutputBufferWrite(buf, 2, "name); + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + xmlOutputBufferWrite(buf, 2, "?>"); + } + break; + + case XML_COMMENT_NODE: + if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + if (cur->content != NULL) { + xmlOutputBufferWrite(buf, 4, ""); + } + break; + + case XML_ENTITY_REF_NODE: + xmlOutputBufferWrite(buf, 1, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWrite(buf, 1, ";"); + break; + + case XML_CDATA_SECTION_NODE: + if (cur->content == NULL || *cur->content == '\0') { + xmlOutputBufferWrite(buf, 12, ""); + } else { + start = end = cur->content; + while (*end != '\0') { + if ((*end == ']') && (*(end + 1) == ']') && + (*(end + 2) == '>')) { + end = end + 2; + xmlOutputBufferWrite(buf, 9, ""); + start = end; + } + end++; + } + if (start != end) { + xmlOutputBufferWrite(buf, 9, ""); + } + } + break; - format = ctxt->format; - if (format == 1) { - tmp = cur->children; - while (tmp != NULL) { - if ((tmp->type == XML_TEXT_NODE) || - (tmp->type == XML_CDATA_SECTION_NODE) || - (tmp->type == XML_ENTITY_REF_NODE)) { - ctxt->format = 0; - break; - } - tmp = tmp->next; - } - } - xmlOutputBufferWrite(buf, 1, "<"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); - } + case XML_ATTRIBUTE_NODE: + xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); + break; - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->nsDef) - xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); - if (cur->properties != NULL) - xmlAttrListDumpOutput(ctxt, cur->properties); - - if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && - (cur->children == NULL) && ((ctxt->options & XML_SAVE_NO_EMPTY) == 0)) { - if (ctxt->format == 2) - xmlOutputBufferWriteWSNonSig(ctxt, 0); - xmlOutputBufferWrite(buf, 2, "/>"); - ctxt->format = format; - return; - } - if (ctxt->format == 2) - xmlOutputBufferWriteWSNonSig(ctxt, 1); - xmlOutputBufferWrite(buf, 1, ">"); - if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { - xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); - } - if (cur->children != NULL) { - if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); - if (ctxt->level >= 0) ctxt->level++; - xmlNodeListDumpOutput(ctxt, cur->children); - if (ctxt->level > 0) ctxt->level--; - if ((xmlIndentTreeOutput) && (ctxt->format == 1)) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level), - ctxt->indent); - } - xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); - } + case XML_NAMESPACE_DECL: + xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); + break; - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (ctxt->format == 2) - xmlOutputBufferWriteWSNonSig(ctxt, 0); - xmlOutputBufferWrite(buf, 1, ">"); - ctxt->format = format; + default: + break; + } + + while (1) { + if (cur == root) + return; + if ((ctxt->format == 1) && + (cur->type != XML_XINCLUDE_START) && + (cur->type != XML_XINCLUDE_END)) + xmlOutputBufferWrite(buf, 1, "\n"); + if (cur->next != NULL) { + cur = cur->next; + break; + } + + cur = parent; + /* cur->parent was validated when descending. */ + parent = cur->parent; + + if (cur->type == XML_ELEMENT_NODE) { + if (ctxt->level > 0) ctxt->level--; + if ((xmlIndentTreeOutput) && (ctxt->format == 1)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, + (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + xmlOutputBufferWrite(buf, 1, ">"); + + if (cur == unformattedNode) { + ctxt->format = format; + unformattedNode = NULL; + } + } + } + } } /** @@ -1224,7 +1243,9 @@ xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur) { else #endif xmlNodeDumpOutputInternal(ctxt, child); - xmlOutputBufferWrite(buf, 1, "\n"); + if ((child->type != XML_XINCLUDE_START) && + (child->type != XML_XINCLUDE_END)) + xmlOutputBufferWrite(buf, 1, "\n"); child = child->next; } } @@ -1395,40 +1416,6 @@ xhtmlAttrListDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { } } -/** - * xhtmlNodeListDumpOutput: - * @buf: the XML buffer output - * @doc: the XHTML document - * @cur: the first node - * @level: the imbrication level for indenting - * @format: is formatting allowed - * @encoding: an optional encoding string - * - * Dump an XML node list, recursive behaviour, children are printed too. - * Note that @format = 1 provide node indenting only if xmlIndentTreeOutput = 1 - * or xmlKeepBlanksDefault(0) was called - */ -static void -xhtmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { - xmlOutputBufferPtr buf; - - if (cur == NULL) return; - buf = ctxt->buf; - while (cur != NULL) { - if ((ctxt->format == 1) && (xmlIndentTreeOutput) && - (cur->type == XML_ELEMENT_NODE)) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level), - ctxt->indent); - xhtmlNodeDumpOutput(ctxt, cur); - if (ctxt->format == 1) { - xmlOutputBufferWrite(buf, 1, "\n"); - } - cur = cur->next; - } -} - /** * xhtmlNodeDumpOutput: * @buf: the XML buffer output @@ -1442,48 +1429,195 @@ xhtmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { */ static void xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { - int format, addmeta = 0; - xmlNodePtr tmp; + int format = ctxt->format, addmeta; + xmlNodePtr tmp, root, unformattedNode = NULL; xmlChar *start, *end; - xmlOutputBufferPtr buf; + xmlOutputBufferPtr buf = ctxt->buf; if (cur == NULL) return; - if ((cur->type == XML_DOCUMENT_NODE) || - (cur->type == XML_HTML_DOCUMENT_NODE)) { - xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); - return; - } - if (cur->type == XML_XINCLUDE_START) - return; - if (cur->type == XML_XINCLUDE_END) - return; - if (cur->type == XML_NAMESPACE_DECL) { - xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); - return; - } - if (cur->type == XML_DTD_NODE) { - xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); - return; - } - if (cur->type == XML_DOCUMENT_FRAG_NODE) { - xhtmlNodeListDumpOutput(ctxt, cur->children); - return; - } - buf = ctxt->buf; - if (cur->type == XML_ELEMENT_DECL) { - xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); - return; - } - if (cur->type == XML_ATTRIBUTE_DECL) { - xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); - return; - } - if (cur->type == XML_ENTITY_DECL) { - xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); - return; - } - if (cur->type == XML_TEXT_NODE) { - if (cur->content != NULL) { + + root = cur; + while (1) { + switch (cur->type) { + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); + break; + + case XML_NAMESPACE_DECL: + xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); + break; + + case XML_DTD_NODE: + xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); + break; + + case XML_DOCUMENT_FRAG_NODE: + if (cur->children) { + cur = cur->children; + continue; + } + break; + + case XML_ELEMENT_DECL: + xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); + break; + + case XML_ATTRIBUTE_DECL: + xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); + break; + + case XML_ENTITY_DECL: + xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); + break; + + case XML_ELEMENT_NODE: + addmeta = 0; + + if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + xmlOutputBufferWrite(buf, 1, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } + + xmlOutputBufferWriteString(buf, (const char *)cur->name); + if (cur->nsDef) + xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); + if ((xmlStrEqual(cur->name, BAD_CAST "html") && + (cur->ns == NULL) && (cur->nsDef == NULL))) { + /* + * 3.1.1. Strictly Conforming Documents A.3.1.1 3/ + */ + xmlOutputBufferWriteString(buf, + " xmlns=\"http://www.w3.org/1999/xhtml\""); + } + if (cur->properties != NULL) + xhtmlAttrListDumpOutput(ctxt, cur->properties); + + if ((cur->parent != NULL) && + (cur->parent->parent == (xmlNodePtr) cur->doc) && + xmlStrEqual(cur->name, BAD_CAST"head") && + xmlStrEqual(cur->parent->name, BAD_CAST"html")) { + + tmp = cur->children; + while (tmp != NULL) { + if (xmlStrEqual(tmp->name, BAD_CAST"meta")) { + xmlChar *httpequiv; + + httpequiv = xmlGetProp(tmp, BAD_CAST"http-equiv"); + if (httpequiv != NULL) { + if (xmlStrcasecmp(httpequiv, + BAD_CAST"Content-Type") == 0) { + xmlFree(httpequiv); + break; + } + xmlFree(httpequiv); + } + } + tmp = tmp->next; + } + if (tmp == NULL) + addmeta = 1; + } + + if (cur->children == NULL) { + if (((cur->ns == NULL) || (cur->ns->prefix == NULL)) && + ((xhtmlIsEmpty(cur) == 1) && (addmeta == 0))) { + /* + * C.2. Empty Elements + */ + xmlOutputBufferWrite(buf, 3, " />"); + } else { + if (addmeta == 1) { + xmlOutputBufferWrite(buf, 1, ">"); + if (ctxt->format == 1) { + xmlOutputBufferWrite(buf, 1, "\n"); + if (xmlIndentTreeOutput) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level + 1 > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level + 1), + ctxt->indent); + } + xmlOutputBufferWriteString(buf, + ""); + if (ctxt->format == 1) + xmlOutputBufferWrite(buf, 1, "\n"); + } else { + xmlOutputBufferWrite(buf, 1, ">"); + } + /* + * C.3. Element Minimization and Empty Element Content + */ + xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, + (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWrite(buf, 1, ">"); + } + } else { + xmlOutputBufferWrite(buf, 1, ">"); + if (addmeta == 1) { + if (ctxt->format == 1) { + xmlOutputBufferWrite(buf, 1, "\n"); + if (xmlIndentTreeOutput) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level + 1 > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level + 1), + ctxt->indent); + } + xmlOutputBufferWriteString(buf, + ""); + } + + if (ctxt->format == 1) { + tmp = cur->children; + while (tmp != NULL) { + if ((tmp->type == XML_TEXT_NODE) || + (tmp->type == XML_ENTITY_REF_NODE)) { + unformattedNode = cur; + ctxt->format = 0; + break; + } + tmp = tmp->next; + } + } + + if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); + if (ctxt->level >= 0) ctxt->level++; + cur = cur->children; + continue; + } + + break; + + case XML_TEXT_NODE: + if (cur->content == NULL) + break; if ((cur->name == xmlStringText) || (cur->name != xmlStringTextNoenc)) { xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); @@ -1493,286 +1627,115 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { */ xmlOutputBufferWriteString(buf, (const char *) cur->content); } - } - - return; - } - if (cur->type == XML_PI_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWrite(buf, 2, "name); - if (cur->content != NULL) { - xmlOutputBufferWrite(buf, 1, " "); - xmlOutputBufferWriteString(buf, (const char *)cur->content); - } - xmlOutputBufferWrite(buf, 2, "?>"); - } else { - xmlOutputBufferWrite(buf, 2, "name); - xmlOutputBufferWrite(buf, 2, "?>"); - } - return; - } - if (cur->type == XML_COMMENT_NODE) { - if (cur->content != NULL) { - xmlOutputBufferWrite(buf, 4, ""); - } - return; - } - if (cur->type == XML_ENTITY_REF_NODE) { - xmlOutputBufferWrite(buf, 1, "&"); - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWrite(buf, 1, ";"); - return; - } - if (cur->type == XML_CDATA_SECTION_NODE) { - if (cur->content == NULL || *cur->content == '\0') { - xmlOutputBufferWrite(buf, 12, ""); - } else { - start = end = cur->content; - while (*end != '\0') { - if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') { - end = end + 2; - xmlOutputBufferWrite(buf, 9, ""); - start = end; - } - end++; - } - if (start != end) { - xmlOutputBufferWrite(buf, 9, ""); - } - } - return; - } - if (cur->type == XML_ATTRIBUTE_NODE) { - xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); - return; - } + break; - format = ctxt->format; - if (format == 1) { - tmp = cur->children; - while (tmp != NULL) { - if ((tmp->type == XML_TEXT_NODE) || - (tmp->type == XML_ENTITY_REF_NODE)) { - format = 0; - break; - } - tmp = tmp->next; - } - } - xmlOutputBufferWrite(buf, 1, "<"); - if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); - } + case XML_PI_NODE: + if (cur->content != NULL) { + xmlOutputBufferWrite(buf, 2, "name); + if (cur->content != NULL) { + xmlOutputBufferWrite(buf, 1, " "); + xmlOutputBufferWriteString(buf, + (const char *)cur->content); + } + xmlOutputBufferWrite(buf, 2, "?>"); + } else { + xmlOutputBufferWrite(buf, 2, "name); + xmlOutputBufferWrite(buf, 2, "?>"); + } + break; - xmlOutputBufferWriteString(buf, (const char *)cur->name); - if (cur->nsDef) - xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); - if ((xmlStrEqual(cur->name, BAD_CAST "html") && - (cur->ns == NULL) && (cur->nsDef == NULL))) { - /* - * 3.1.1. Strictly Conforming Documents A.3.1.1 3/ - */ - xmlOutputBufferWriteString(buf, - " xmlns=\"http://www.w3.org/1999/xhtml\""); - } - if (cur->properties != NULL) - xhtmlAttrListDumpOutput(ctxt, cur->properties); - - if ((cur->type == XML_ELEMENT_NODE) && - (cur->parent != NULL) && - (cur->parent->parent == (xmlNodePtr) cur->doc) && - xmlStrEqual(cur->name, BAD_CAST"head") && - xmlStrEqual(cur->parent->name, BAD_CAST"html")) { - - tmp = cur->children; - while (tmp != NULL) { - if (xmlStrEqual(tmp->name, BAD_CAST"meta")) { - xmlChar *httpequiv; - - httpequiv = xmlGetProp(tmp, BAD_CAST"http-equiv"); - if (httpequiv != NULL) { - if (xmlStrcasecmp(httpequiv, BAD_CAST"Content-Type") == 0) { - xmlFree(httpequiv); - break; + case XML_COMMENT_NODE: + if (cur->content != NULL) { + xmlOutputBufferWrite(buf, 4, ""); + } + break; + + case XML_ENTITY_REF_NODE: + xmlOutputBufferWrite(buf, 1, "&"); + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWrite(buf, 1, ";"); + break; + + case XML_CDATA_SECTION_NODE: + if (cur->content == NULL || *cur->content == '\0') { + xmlOutputBufferWrite(buf, 12, ""); + } else { + start = end = cur->content; + while (*end != '\0') { + if (*end == ']' && *(end + 1) == ']' && + *(end + 2) == '>') { + end = end + 2; + xmlOutputBufferWrite(buf, 9, ""); + start = end; } - xmlFree(httpequiv); + end++; + } + if (start != end) { + xmlOutputBufferWrite(buf, 9, ""); } } - tmp = tmp->next; + break; + + case XML_ATTRIBUTE_NODE: + xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); + break; + + default: + break; } - if (tmp == NULL) - addmeta = 1; - } - if ((cur->type == XML_ELEMENT_NODE) && (cur->children == NULL)) { - if (((cur->ns == NULL) || (cur->ns->prefix == NULL)) && - ((xhtmlIsEmpty(cur) == 1) && (addmeta == 0))) { - /* - * C.2. Empty Elements - */ - xmlOutputBufferWrite(buf, 3, " />"); - } else { - if (addmeta == 1) { - xmlOutputBufferWrite(buf, 1, ">"); - if (ctxt->format == 1) { - xmlOutputBufferWrite(buf, 1, "\n"); - if (xmlIndentTreeOutput) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level + 1 > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level + 1), ctxt->indent); - } - xmlOutputBufferWriteString(buf, - ""); - if (ctxt->format == 1) - xmlOutputBufferWrite(buf, 1, "\n"); - } else { - xmlOutputBufferWrite(buf, 1, ">"); - } - /* - * C.3. Element Minimization and Empty Element Content - */ - xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); - } - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWrite(buf, 1, ">"); - } - return; - } - xmlOutputBufferWrite(buf, 1, ">"); - if (addmeta == 1) { - if (ctxt->format == 1) { - xmlOutputBufferWrite(buf, 1, "\n"); - if (xmlIndentTreeOutput) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level + 1 > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level + 1), ctxt->indent); - } - xmlOutputBufferWriteString(buf, - ""); - } - if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { - xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); - } + while (1) { + if (cur == root) + return; + if (ctxt->format == 1) + xmlOutputBufferWrite(buf, 1, "\n"); + if (cur->next != NULL) { + cur = cur->next; + break; + } -#if 0 - /* - * This was removed due to problems with HTML processors. - * See bug #345147. - */ - /* - * 4.8. Script and Style elements - */ - if ((cur->type == XML_ELEMENT_NODE) && - ((xmlStrEqual(cur->name, BAD_CAST "script")) || - (xmlStrEqual(cur->name, BAD_CAST "style"))) && - ((cur->ns == NULL) || - (xmlStrEqual(cur->ns->href, XHTML_NS_NAME)))) { - xmlNodePtr child = cur->children; - - while (child != NULL) { - if (child->type == XML_TEXT_NODE) { - if ((xmlStrchr(child->content, '<') == NULL) && - (xmlStrchr(child->content, '&') == NULL) && - (xmlStrstr(child->content, BAD_CAST "]]>") == NULL)) { - /* Nothing to escape, so just output as is... */ - /* FIXME: Should we do something about "--" also? */ - int level = ctxt->level; - int indent = ctxt->format; - - ctxt->level = 0; - ctxt->format = 0; - xmlOutputBufferWriteString(buf, (const char *) child->content); - /* (We cannot use xhtmlNodeDumpOutput() here because - * we wish to leave '>' unescaped!) */ - ctxt->level = level; - ctxt->format = indent; - } else { - /* We must use a CDATA section. Unfortunately, - * this will break CSS and JavaScript when read by - * a browser in HTML4-compliant mode. :-( */ - start = end = child->content; - while (*end != '\0') { - if (*end == ']' && - *(end + 1) == ']' && - *(end + 2) == '>') { - end = end + 2; - xmlOutputBufferWrite(buf, 9, ""); - start = end; - } - end++; - } - if (start != end) { - xmlOutputBufferWrite(buf, 9, ""); - } - } - } else { - int level = ctxt->level; - int indent = ctxt->format; + /* + * The parent should never be NULL here but we want to handle + * corrupted documents gracefully. + */ + if (cur->parent == NULL) + return; + cur = cur->parent; + + if (cur->type == XML_ELEMENT_NODE) { + if (ctxt->level > 0) ctxt->level--; + if ((xmlIndentTreeOutput) && (ctxt->format == 1)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); + + xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, + (const char *)cur->ns->prefix); + xmlOutputBufferWrite(buf, 1, ":"); + } - ctxt->level = 0; - ctxt->format = 0; - xhtmlNodeDumpOutput(ctxt, child); - ctxt->level = level; - ctxt->format = indent; - } - child = child->next; - } - } -#endif + xmlOutputBufferWriteString(buf, (const char *)cur->name); + xmlOutputBufferWrite(buf, 1, ">"); - if (cur->children != NULL) { - int indent = ctxt->format; - - if (format == 1) xmlOutputBufferWrite(buf, 1, "\n"); - if (ctxt->level >= 0) ctxt->level++; - ctxt->format = format; - xhtmlNodeListDumpOutput(ctxt, cur->children); - if (ctxt->level > 0) ctxt->level--; - ctxt->format = indent; - if ((xmlIndentTreeOutput) && (format == 1)) - xmlOutputBufferWrite(buf, ctxt->indent_size * - (ctxt->level > ctxt->indent_nr ? - ctxt->indent_nr : ctxt->level), - ctxt->indent); - } - xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { - xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); - xmlOutputBufferWrite(buf, 1, ":"); + if (cur == unformattedNode) { + ctxt->format = format; + unformattedNode = NULL; + } + } + } } - - xmlOutputBufferWriteString(buf, (const char *)cur->name); - xmlOutputBufferWrite(buf, 1, ">"); } #endif @@ -1932,12 +1895,25 @@ xmlSaveDoc(xmlSaveCtxtPtr ctxt, xmlDocPtr doc) * Returns the number of byte written or -1 in case of error */ long -xmlSaveTree(xmlSaveCtxtPtr ctxt, xmlNodePtr node) +xmlSaveTree(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { long ret = 0; - if ((ctxt == NULL) || (node == NULL)) return(-1); - xmlNodeDumpOutputInternal(ctxt, node); + if ((ctxt == NULL) || (cur == NULL)) return(-1); +#ifdef LIBXML_HTML_ENABLED + if (ctxt->options & XML_SAVE_XHTML) { + xhtmlNodeDumpOutput(ctxt, cur); + return(ret); + } + if (((cur->type != XML_NAMESPACE_DECL) && (cur->doc != NULL) && + (cur->doc->type == XML_HTML_DOCUMENT_NODE) && + ((ctxt->options & XML_SAVE_AS_XML) == 0)) || + (ctxt->options & XML_SAVE_AS_HTML)) { + htmlNodeDumpOutputInternal(ctxt, cur); + return(ret); + } +#endif + xmlNodeDumpOutputInternal(ctxt, cur); return(ret); } diff --git a/third_party/libxml/src/xmlstring.c b/third_party/libxml/src/xmlstring.c index 780c6435a0fe..62d3053b4824 100644 --- a/third_party/libxml/src/xmlstring.c +++ b/third_party/libxml/src/xmlstring.c @@ -130,16 +130,18 @@ xmlCharStrdup(const char *cur) { int xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { - register int tmp; - if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strcmp((const char *)str1, (const char *)str2)); +#else do { - tmp = *str1++ - *str2; + int tmp = *str1++ - *str2; if (tmp != 0) return(tmp); } while (*str2++ != 0); return 0; +#endif } /** @@ -158,10 +160,14 @@ xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { if (str1 == str2) return(1); if (str1 == NULL) return(0); if (str2 == NULL) return(0); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strcmp((const char *)str1, (const char *)str2) == 0); +#else do { if (*str1++ != *str2) return(0); } while (*str2++); return(1); +#endif } /** @@ -204,18 +210,15 @@ xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { int xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { - register int tmp; - if (len <= 0) return(0); if (str1 == str2) return(0); if (str1 == NULL) return(-1); if (str2 == NULL) return(1); -#ifdef __GNUC__ - tmp = strncmp((const char *)str1, (const char *)str2, len); - return tmp; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return(strncmp((const char *)str1, (const char *)str2, len)); #else do { - tmp = *str1++ - *str2; + int tmp = *str1++ - *str2; if (tmp != 0 || --len == 0) return(tmp); } while (*str2++ != 0); return 0; diff --git a/third_party/libxml/src/xpath.c b/third_party/libxml/src/xpath.c index 9ec533dbbbf3..ddbc35d234f0 100644 --- a/third_party/libxml/src/xpath.c +++ b/third_party/libxml/src/xpath.c @@ -137,6 +137,17 @@ */ #define XPATH_MAX_NODESET_LENGTH 10000000 +/* + * XPATH_MAX_RECRUSION_DEPTH: + * Maximum amount of nested functions calls when parsing or evaluating + * expressions + */ +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +#define XPATH_MAX_RECURSION_DEPTH 500 +#else +#define XPATH_MAX_RECURSION_DEPTH 5000 +#endif + /* * TODO: * There are a few spots where some tests are done which depend upon ascii @@ -479,14 +490,6 @@ int wrap_cmp( xmlNodePtr x, xmlNodePtr y ); * * ************************************************************************/ -#ifndef INFINITY -#define INFINITY (DBL_MAX * DBL_MAX) -#endif - -#ifndef NAN -#define NAN (INFINITY / INFINITY) -#endif - double xmlXPathNAN; double xmlXPathPINF; double xmlXPathNINF; @@ -496,11 +499,14 @@ double xmlXPathNINF; * * Initialize the XPath environment */ +ATTRIBUTE_NO_SANITIZE("float-divide-by-zero") void xmlXPathInit(void) { - xmlXPathNAN = NAN; - xmlXPathPINF = INFINITY; - xmlXPathNINF = -INFINITY; + /* MSVC doesn't allow division by zero in constant expressions. */ + double zero = 0.0; + xmlXPathNAN = 0.0 / zero; + xmlXPathPINF = 1.0 / zero; + xmlXPathNINF = -xmlXPathPINF; } /** @@ -529,9 +535,9 @@ xmlXPathIsInf(double val) { #ifdef isinf return isinf(val) ? (val > 0 ? 1 : -1) : 0; #else - if (val >= INFINITY) + if (val >= xmlXPathPINF) return 1; - if (val <= -INFINITY) + if (val <= -xmlXPathPINF) return -1; return 0; #endif @@ -1748,7 +1754,6 @@ static int xmlXPathDebugObjMaxUsers = 0; static int xmlXPathDebugObjMaxXSLTTree = 0; static int xmlXPathDebugObjMaxAll = 0; -/* REVISIT TODO: Make this static when committing */ static void xmlXPathDebugObjUsageReset(xmlXPathContextPtr ctxt) { @@ -2063,7 +2068,6 @@ xmlXPathDebugObjUsageReleased(xmlXPathContextPtr ctxt, xmlXPathDebugObjCounterAll--; } -/* REVISIT TODO: Make this static when committing */ static void xmlXPathDebugObjUsageDisplay(xmlXPathContextPtr ctxt) { @@ -5866,10 +5870,10 @@ xmlXPathCastNodeToNumber (xmlNodePtr node) { double ret; if (node == NULL) - return(NAN); + return(xmlXPathNAN); strval = xmlXPathCastNodeToString(node); if (strval == NULL) - return(NAN); + return(xmlXPathNAN); ret = xmlXPathCastStringToNumber(strval); xmlFree(strval); @@ -5890,7 +5894,7 @@ xmlXPathCastNodeSetToNumber (xmlNodeSetPtr ns) { double ret; if (ns == NULL) - return(NAN); + return(xmlXPathNAN); str = xmlXPathCastNodeSetToString(ns); ret = xmlXPathCastStringToNumber(str); xmlFree(str); @@ -5910,13 +5914,13 @@ xmlXPathCastToNumber(xmlXPathObjectPtr val) { double ret = 0.0; if (val == NULL) - return(NAN); + return(xmlXPathNAN); switch (val->type) { case XPATH_UNDEFINED: #ifdef DEBUG_EXPR xmlGenericError(xmlGenericErrorContext, "NUMBER: undefined\n"); #endif - ret = NAN; + ret = xmlXPathNAN; break; case XPATH_NODESET: case XPATH_XSLT_TREE: @@ -5936,7 +5940,7 @@ xmlXPathCastToNumber(xmlXPathObjectPtr val) { case XPATH_RANGE: case XPATH_LOCATIONSET: TODO; - ret = NAN; + ret = xmlXPathNAN; break; } return(ret); @@ -6122,9 +6126,6 @@ xmlXPathNewContext(xmlDocPtr doc) { ret->contextSize = -1; ret->proximityPosition = -1; - ret->maxDepth = INT_MAX; - ret->maxParserDepth = INT_MAX; - #ifdef XP_DEFAULT_CACHE_ON if (xmlXPathContextSetCache(ret, 1, -1, 0) == -1) { xmlXPathFreeContext(ret); @@ -7566,7 +7567,7 @@ xmlXPathModValues(xmlXPathParserContextPtr ctxt) { CHECK_TYPE(XPATH_NUMBER); arg1 = ctxt->value->floatval; if (arg2 == 0) - ctxt->value->floatval = NAN; + ctxt->value->floatval = xmlXPathNAN; else { ctxt->value->floatval = fmod(arg1, arg2); } @@ -9996,7 +9997,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { if (cur == NULL) return(0); while (IS_BLANK_CH(*cur)) cur++; if ((*cur != '.') && ((*cur < '0') || (*cur > '9')) && (*cur != '-')) { - return(NAN); + return(xmlXPathNAN); } if (*cur == '-') { isneg = 1; @@ -10032,7 +10033,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { cur++; if (((*cur < '0') || (*cur > '9')) && (!ok)) { - return(NAN); + return(xmlXPathNAN); } while (*cur == '0') { frac = frac + 1; @@ -10065,7 +10066,7 @@ xmlXPathStringEvalNumber(const xmlChar *str) { } } while (IS_BLANK_CH(*cur)) cur++; - if (*cur != 0) return(NAN); + if (*cur != 0) return(xmlXPathNAN); if (isneg) ret = -ret; if (is_exponent_negative) exponent = -exponent; ret *= pow(10.0, (double)exponent); @@ -10952,9 +10953,13 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) { xmlXPathContextPtr xpctxt = ctxt->context; if (xpctxt != NULL) { - if (xpctxt->depth >= xpctxt->maxParserDepth) + if (xpctxt->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED); - xpctxt->depth += 1; + /* + * Parsing a single '(' pushes about 10 functions on the call stack + * before recursing! + */ + xpctxt->depth += 10; } xmlXPathCompAndExpr(ctxt); @@ -10980,7 +10985,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) { } if (xpctxt != NULL) - xpctxt->depth -= 1; + xpctxt->depth -= 10; } /** @@ -11669,11 +11674,11 @@ xmlXPathNodeSetFilter(xmlXPathParserContextPtr ctxt, res = xmlXPathCompOpEvalToBoolean(ctxt, filterOp, 1); if (ctxt->error != XPATH_EXPRESSION_OK) - goto exit; + break; if (res < 0) { /* Shouldn't happen */ xmlXPathErr(ctxt, XPATH_EXPR_ERROR); - goto exit; + break; } if ((res != 0) && ((pos >= minPos) && (pos <= maxPos))) { @@ -11692,15 +11697,7 @@ xmlXPathNodeSetFilter(xmlXPathParserContextPtr ctxt, if (res != 0) { if (pos == maxPos) { - /* Clear remaining nodes and exit loop. */ - if (hasNsNodes) { - for (i++; i < set->nodeNr; i++) { - node = set->nodeTab[i]; - if ((node != NULL) && - (node->type == XML_NAMESPACE_DECL)) - xmlXPathNodeSetFreeNs((xmlNsPtr) node); - } - } + i += 1; break; } @@ -11708,6 +11705,15 @@ xmlXPathNodeSetFilter(xmlXPathParserContextPtr ctxt, } } + /* Free remaining nodes. */ + if (hasNsNodes) { + for (; i < set->nodeNr; i++) { + xmlNodePtr node = set->nodeTab[i]; + if ((node != NULL) && (node->type == XML_NAMESPACE_DECL)) + xmlXPathNodeSetFreeNs((xmlNsPtr) node); + } + } + set->nodeNr = j; /* If too many elements were removed, shrink table to preserve memory. */ @@ -11728,7 +11734,6 @@ xmlXPathNodeSetFilter(xmlXPathParserContextPtr ctxt, } } -exit: xpctxt->node = oldnode; xpctxt->doc = olddoc; xpctxt->contextSize = oldcs; @@ -11793,11 +11798,11 @@ xmlXPathLocationSetFilter(xmlXPathParserContextPtr ctxt, res = xmlXPathCompOpEvalToBoolean(ctxt, filterOp, 1); if (ctxt->error != XPATH_EXPRESSION_OK) - goto exit; + break; if (res < 0) { /* Shouldn't happen */ xmlXPathErr(ctxt, XPATH_EXPR_ERROR); - goto exit; + break; } if ((res != 0) && ((pos >= minPos) && (pos <= maxPos))) { @@ -11815,10 +11820,7 @@ xmlXPathLocationSetFilter(xmlXPathParserContextPtr ctxt, if (res != 0) { if (pos == maxPos) { - /* Clear remaining nodes and exit loop. */ - for (i++; i < locset->locNr; i++) { - xmlXPathFreeObject(locset->locTab[i]); - } + i += 1; break; } @@ -11826,6 +11828,10 @@ xmlXPathLocationSetFilter(xmlXPathParserContextPtr ctxt, } } + /* Free remaining nodes. */ + for (; i < locset->locNr; i++) + xmlXPathFreeObject(locset->locTab[i]); + locset->locNr = j; /* If too many elements were removed, shrink table to preserve memory. */ @@ -11846,7 +11852,6 @@ xmlXPathLocationSetFilter(xmlXPathParserContextPtr ctxt, } } -exit: xpctxt->node = oldnode; xpctxt->doc = olddoc; xpctxt->contextSize = oldcs; @@ -11884,7 +11889,7 @@ xmlXPathCompOpEvalPredicate(xmlXPathParserContextPtr ctxt, "xmlXPathCompOpEvalPredicate: Expected a predicate\n"); XP_ERROR(XPATH_INVALID_OPERAND); } - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; xmlXPathCompOpEvalPredicate(ctxt, &comp->steps[op->ch1], set, @@ -12600,7 +12605,7 @@ xmlXPathCompOpEvalFirst(xmlXPathParserContextPtr ctxt, CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -12741,7 +12746,7 @@ xmlXPathCompOpEvalLast(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op, CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -12959,7 +12964,7 @@ xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op) CHECK_ERROR0; if (OP_LIMIT_EXCEEDED(ctxt, 1)) return(0); - if (ctxt->context->depth >= ctxt->context->maxDepth) + if (ctxt->context->depth >= XPATH_MAX_RECURSION_DEPTH) XP_ERROR0(XPATH_RECURSION_LIMIT_EXCEEDED); ctxt->context->depth += 1; comp = ctxt->comp; @@ -13835,7 +13840,8 @@ xmlXPathRunStreamEval(xmlXPathContextPtr ctxt, xmlPatternPtr comp, do { cur = cur->parent; depth--; - if ((cur == NULL) || (cur == limit)) + if ((cur == NULL) || (cur == limit) || + (cur->type == XML_DOCUMENT_NODE)) goto done; if (cur->type == XML_ELEMENT_NODE) { ret = xmlStreamPop(patstream); @@ -14106,8 +14112,7 @@ xmlXPathTryStreamCompile(xmlXPathContextPtr ctxt, const xmlChar *str) { } } - stream = xmlPatterncompile(str, dict, XML_PATTERN_XPATH, - &namespaces[0]); + stream = xmlPatterncompile(str, dict, XML_PATTERN_XPATH, namespaces); if (namespaces != NULL) { xmlFree((xmlChar **)namespaces); } @@ -14193,7 +14198,7 @@ xmlXPathOptimizeExpression(xmlXPathParserContextPtr pctxt, /* Recurse */ ctxt = pctxt->context; if (ctxt != NULL) { - if (ctxt->depth >= ctxt->maxDepth) + if (ctxt->depth >= XPATH_MAX_RECURSION_DEPTH) return; ctxt->depth += 1; } @@ -14226,7 +14231,7 @@ xmlXPathCtxtCompile(xmlXPathContextPtr ctxt, const xmlChar *str) { return(comp); #endif - xmlXPathInit(); + xmlInitParser(); pctxt = xmlXPathNewParserContext(str, ctxt); if (pctxt == NULL) @@ -14315,7 +14320,7 @@ xmlXPathCompiledEvalInternal(xmlXPathCompExprPtr comp, if (comp == NULL) return(-1); - xmlXPathInit(); + xmlInitParser(); #ifndef LIBXML_THREAD_ENABLED reentance++; @@ -14460,7 +14465,7 @@ xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctx) { CHECK_CTXT(ctx) - xmlXPathInit(); + xmlInitParser(); ctxt = xmlXPathNewParserContext(str, ctx); if (ctxt == NULL) diff --git a/third_party/libxml/win32/config.h b/third_party/libxml/win32/config.h index 324b74ff1374..54392f4961e7 100644 --- a/third_party/libxml/win32/config.h +++ b/third_party/libxml/win32/config.h @@ -5,7 +5,8 @@ #define HAVE_STDARG_H #define HAVE_MALLOC_H #define HAVE_ERRNO_H -#define HAVE_STDINT_H +#define SEND_ARG2_CAST +#define GETHOSTBYNAME_ARG_CAST #if defined(_WIN32_WCE) #undef HAVE_ERRNO_H @@ -90,7 +91,7 @@ static int isnan (double d) { #if defined(_MSC_VER) #define mkdir(p,m) _mkdir(p) -#if _MSC_VER < 1900 // Cannot define this in VS 2015 and above! +#if _MSC_VER < 1900 #define snprintf _snprintf #endif #if _MSC_VER < 1500 diff --git a/third_party/libxml/win32/include/libxml/xmlversion.h b/third_party/libxml/win32/include/libxml/xmlversion.h index 28ecf084b274..31593eda704a 100644 --- a/third_party/libxml/win32/include/libxml/xmlversion.h +++ b/third_party/libxml/win32/include/libxml/xmlversion.h @@ -29,21 +29,21 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * * the version string like "1.2.3" */ -#define LIBXML_DOTTED_VERSION "2.9.10" +#define LIBXML_DOTTED_VERSION "2.9.12" /** * LIBXML_VERSION: * * the version number: 1.2.3 value is 10203 */ -#define LIBXML_VERSION 20910 +#define LIBXML_VERSION 20912 /** * LIBXML_VERSION_STRING: * * the version number string, 1.2.3 value is "10203" */ -#define LIBXML_VERSION_STRING "20910" +#define LIBXML_VERSION_STRING "20912" /** * LIBXML_VERSION_EXTRA: @@ -58,7 +58,7 @@ XMLPUBFUN void XMLCALL xmlCheckVersion(int version); * Macro to check that the libxml version in use is compatible with * the version the software has been compiled against */ -#define LIBXML_TEST_VERSION xmlCheckVersion(20910); +#define LIBXML_TEST_VERSION xmlCheckVersion(20912); #ifndef VMS #if 0