From c033f77099126ed63d6da1f62993cd1af88e58ab Mon Sep 17 00:00:00 2001 From: bluhm Date: Mon, 1 Apr 2024 13:30:41 +0000 Subject: [PATCH] Update libexpat to version 2.6.2. The fix for CVE-2024-28757 has been applied earlier. Relevant for OpenBSD are bug fixes #839 #841, and other change #829. No library bump is necessary. OK deraadt@ --- lib/libexpat/Changes | 78 +++++++++++++++- lib/libexpat/README.md | 2 +- lib/libexpat/doc/reference.html | 7 +- lib/libexpat/lib/expat.h | 5 +- lib/libexpat/lib/internal.h | 17 +++- lib/libexpat/lib/xmlparse.c | 24 +++-- lib/libexpat/tests/basic_tests.c | 151 +++++++++++++++++-------------- lib/libexpat/tests/misc_tests.c | 2 +- 8 files changed, 193 insertions(+), 93 deletions(-) diff --git a/lib/libexpat/Changes b/lib/libexpat/Changes index 48df93dc010..52b366d5dd8 100644 --- a/lib/libexpat/Changes +++ b/lib/libexpat/Changes @@ -1,13 +1,83 @@ -NOTE: We are looking for help with a few things: - https://github.com/libexpat/libexpat/labels/help%20wanted - If you can help, please get in touch. Thanks! - + __ __ _ + ___\ \/ /_ __ __ _| |_ + / _ \\ /| '_ \ / _` | __| + | __// \| |_) | (_| | |_ + \___/_/\_\ .__/ \__,_|\__| + |_| XML parser + +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!! Expat is UNDERSTAFFED and WITHOUT FUNDING. !! +!! ~~~~~~~~~~~~ !! +!! The following topics need *additional skilled C developers* to progress !! +!! in a timely manner or at all (loosely ordered by descending priority): !! +!! !! +!! - fixing a complex non-public security issue, !! +!! - teaming up on researching and fixing future security reports and !! +!! ClusterFuzz findings with few-days-max response times in communication !! +!! in order to (1) have a sound fix ready before the end of a 90 days !! +!! grace period and (2) in a sustainable manner, !! +!! - implementing and auto-testing XML 1.0r5 support !! +!! (needs discussion before pull requests), !! +!! - smart ideas on fixing the Autotools CMake files generation issue !! +!! without breaking CI (needs discussion before pull requests), !! +!! - the Windows binaries topic (needs requirements engineering first), !! +!! - pushing migration from `int` to `size_t` further !! +!! including edge-cases test coverage (needs discussion before anything). !! +!! !! +!! For details, please reach out via e-mail to sebastian@pipping.org so we !! +!! can schedule a voice call on the topic, in English or German. !! +!! !! +!! THANK YOU! Sebastian Pipping -- Berlin, 2024-03-09 !! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +Release 2.6.2 Wed March 13 2024 Security fixes: #839 #842 CVE-2024-28757 -- Prevent billion laughs attacks with isolated use of external parsers. Please see the commit message of commit 1d50b80cf31de87750103656f6eb693746854aa8 for details. + Bug fixes: + #839 #841 Reject direct parameter entity recursion + and avoid the related undefined behavior + + Other changes: + #847 Autotools: Fix build for DOCBOOK_TO_MAN containing spaces + #837 Add missing #821 and #824 to 2.6.1 change log + #838 #843 Version info bumped from 10:1:9 (libexpat*.so.1.9.1) + to 10:2:9 (libexpat*.so.1.9.2); see https://verbump.de/ + for what these numbers do + + Special thanks to: + Philippe Antoine + Tomas Korbar + and + Clang UndefinedBehaviorSanitizer + OSS-Fuzz / ClusterFuzz + +Release 2.6.1 Thu February 29 2024 + Bug fixes: + #817 Make tests independent of CPU speed, and thus more robust + #828 #836 Expose billion laughs API with XML_DTD defined and + XML_GE undefined, regression from 2.6.0 + + Other changes: + #829 Hide test-only code behind new internal macro + #833 Autotools: Reject expat_config.h.in defining SIZEOF_VOID_P + #821 #824 Autotools: Fix "make clean" for case: + ./configure --without-docbook && make clean all + #819 Address compiler warnings + #832 #834 Version info bumped from 10:0:9 (libexpat*.so.1.9.0) + to 10:1:9 (libexpat*.so.1.9.1); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #818 CI: Adapt to breaking changes in clang-format + + Special thanks to: + David Hall + Snild Dolkow + Release 2.6.0 Tue February 6 2024 Security fixes: #789 #814 CVE-2023-52425 -- Fix quadratic runtime issues with big tokens diff --git a/lib/libexpat/README.md b/lib/libexpat/README.md index 43c4f4f3dbb..3c20adbee91 100644 --- a/lib/libexpat/README.md +++ b/lib/libexpat/README.md @@ -5,7 +5,7 @@ [![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases) -# Expat, Release 2.6.0 +# Expat, Release 2.6.2 This is Expat, a C99 library for parsing [XML 1.0 Fourth Edition](https://www.w3.org/TR/2006/REC-xml-20060816/), started by diff --git a/lib/libexpat/doc/reference.html b/lib/libexpat/doc/reference.html index 898f03a3364..5614dc34cdc 100644 --- a/lib/libexpat/doc/reference.html +++ b/lib/libexpat/doc/reference.html @@ -52,7 +52,7 @@

The Expat XML Parser - Release 2.6.0 + Release 2.6.2

@@ -356,10 +356,7 @@ library and header would get installed in

Configuring Expat Using the Pre-Processor

Expat's feature set can be configured using a small number of -pre-processor definitions. The definition of this symbols does not -affect the set of entry points for Expat, only the behavior of the API -and the definition of character types in the case of -XML_UNICODE_WCHAR_T. The symbols are:

+pre-processor definitions. The symbols are:

XML_GE
diff --git a/lib/libexpat/lib/expat.h b/lib/libexpat/lib/expat.h index 95464b0dd17..c2770be3897 100644 --- a/lib/libexpat/lib/expat.h +++ b/lib/libexpat/lib/expat.h @@ -18,6 +18,7 @@ Copyright (c) 2022 Thijs Schreijer Copyright (c) 2023 Hanno Böck Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -1042,7 +1043,7 @@ typedef struct { XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -#if XML_GE == 1 +#if defined(XML_DTD) || (defined(XML_GE) && XML_GE == 1) /* Added in Expat 2.4.0 for XML_DTD defined and * added in Expat 2.6.0 for XML_GE == 1. */ XMLPARSEAPI(XML_Bool) @@ -1065,7 +1066,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ #define XML_MAJOR_VERSION 2 #define XML_MINOR_VERSION 6 -#define XML_MICRO_VERSION 0 +#define XML_MICRO_VERSION 2 #ifdef __cplusplus } diff --git a/lib/libexpat/lib/internal.h b/lib/libexpat/lib/internal.h index cce71e4c516..167ec36804a 100644 --- a/lib/libexpat/lib/internal.h +++ b/lib/libexpat/lib/internal.h @@ -28,10 +28,11 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. Copyright (c) 2002-2006 Karl Waclawek Copyright (c) 2003 Greg Stein - Copyright (c) 2016-2023 Sebastian Pipping + Copyright (c) 2016-2024 Sebastian Pipping Copyright (c) 2018 Yury Gribov Copyright (c) 2019 David Loffredo - Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow + Copyright (c) 2024 Taichi Haradaguchi <20001722@ymail.ne.jp> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -155,14 +156,20 @@ extern "C" { void _INTERNAL_trim_to_complete_utf8_characters(const char *from, const char **fromLimRef); -#if XML_GE == 1 +#if defined(XML_GE) && XML_GE == 1 unsigned long long testingAccountingGetCountBytesDirect(XML_Parser parser); unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); const char *unsignedCharToPrintable(unsigned char c); #endif -extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c -extern unsigned int g_parseAttempts; // used for testing only +extern +#if ! defined(XML_TESTING) + const +#endif + XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c +#if defined(XML_TESTING) +extern unsigned int g_bytesScanned; // used for testing only +#endif #ifdef __cplusplus } diff --git a/lib/libexpat/lib/xmlparse.c b/lib/libexpat/lib/xmlparse.c index dfaa527af3a..2951fec70c5 100644 --- a/lib/libexpat/lib/xmlparse.c +++ b/lib/libexpat/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* 628e24d4966bedbd4800f6ed128d06d29703765b4bce12d3b7f099f90f842fc9 (2.6.0+) +/* 2a14271ad4d35e82bde8ba210b4edb7998794bcbae54deab114046a300f9639a (2.6.2+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -38,7 +38,7 @@ Copyright (c) 2022 Jann Horn Copyright (c) 2022 Sean McBride Copyright (c) 2023 Owain Davies - Copyright (c) 2023 Sony Corporation / Snild Dolkow + Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -210,7 +210,7 @@ typedef char ICHAR; #endif /* Round up n to be a multiple of sz, where sz is a power of 2. */ -#define ROUND_UP(n, sz) (((n) + ((sz)-1)) & ~((sz)-1)) +#define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1)) /* Do safe (NULL-aware) pointer arithmetic */ #define EXPAT_SAFE_PTR_DIFF(p, q) (((p) && (q)) ? ((p) - (q)) : 0) @@ -248,7 +248,7 @@ static void copy_salt_to_sipkey(XML_Parser parser, struct sipkey *key); it odd, since odd numbers are always relative prime to a power of 2. */ #define SECOND_HASH(hash, mask, power) \ - ((((hash) & ~(mask)) >> ((power)-1)) & ((mask) >> 2)) + ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) #define PROBE_STEP(hash, mask, power) \ ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) @@ -629,8 +629,14 @@ static unsigned long getDebugLevel(const char *variableName, ? 0 \ : ((*((pool)->ptr)++ = c), 1)) -XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c -unsigned int g_parseAttempts = 0; // used for testing only +#if ! defined(XML_TESTING) +const +#endif + XML_Bool g_reparseDeferralEnabledDefault + = XML_TRUE; // write ONLY in runtests.c +#if defined(XML_TESTING) +unsigned int g_bytesScanned = 0; // used for testing only +#endif struct XML_ParserStruct { /* The first member must be m_userData so that the XML_GetUserData @@ -1017,7 +1023,9 @@ callProcessor(XML_Parser parser, const char *start, const char *end, return XML_ERROR_NONE; } } - g_parseAttempts += 1; +#if defined(XML_TESTING) + g_bytesScanned += (unsigned)have_now; +#endif const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); if (ret == XML_ERROR_NONE) { // if we consumed nothing, remember what we had on this parse attempt. @@ -6232,7 +6240,7 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, dtd->keepProcessing = dtd->standalone; goto endEntityValue; } - if (entity->open) { + if (entity->open || (entity == parser->m_declEntity)) { if (enc == parser->m_encoding) parser->m_eventPtr = entityTextPtr; result = XML_ERROR_RECURSIVE_ENTITY_REF; diff --git a/lib/libexpat/tests/basic_tests.c b/lib/libexpat/tests/basic_tests.c index 7112a440187..91c8dd7a392 100644 --- a/lib/libexpat/tests/basic_tests.c +++ b/lib/libexpat/tests/basic_tests.c @@ -1202,6 +1202,49 @@ START_TEST(test_wfc_no_recursive_entity_refs) { } END_TEST +START_TEST(test_recursive_external_parameter_entity_2) { + struct TestCase { + const char *doc; + enum XML_Status expectedStatus; + }; + + struct TestCase cases[] = { + {"", XML_STATUS_ERROR}, + {"" + "", + XML_STATUS_ERROR}, + {"" + "", + XML_STATUS_OK}, + {"", XML_STATUS_OK}, + }; + + for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { + const char *const doc = cases[i].doc; + const enum XML_Status expectedStatus = cases[i].expectedStatus; + set_subtest("%s", doc); + + XML_Parser parser = XML_ParserCreate(NULL); + assert_true(parser != NULL); + + XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); + assert_true(ext_parser != NULL); + + const enum XML_Status actualStatus + = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); + + assert_true(actualStatus == expectedStatus); + if (actualStatus != XML_STATUS_OK) { + assert_true(XML_GetErrorCode(ext_parser) + == XML_ERROR_RECURSIVE_ENTITY_REF); + } + + XML_ParserFree(ext_parser); + XML_ParserFree(parser); + } +} +END_TEST + /* Test incomplete external entities are faulted */ START_TEST(test_ext_entity_invalid_parse) { const char *text = ". " - "Please keep increasing the value by 1 until it reliably passes the " - "test on your hardware and open a bug sharing that number with us. " - "Thanks in advance!"; +START_TEST(test_big_tokens_scale_linearly) { const struct { const char *pre; const char *post; @@ -5220,65 +5257,57 @@ START_TEST(test_big_tokens_take_linear_time) { {"<", "/>"}, // big elem name, used to be O(N²) }; const int num_cases = sizeof(text) / sizeof(text[0]); - // For the test we need a value that is: - // (1) big enough that the test passes reliably (avoiding flaky tests), and - // (2) small enough that the test actually catches regressions. - const int max_slowdown = 15; char aaaaaa[4096]; const int fillsize = (int)sizeof(aaaaaa); const int fillcount = 100; + const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. + const unsigned max_factor = 4; + const unsigned max_scanned = max_factor * approx_bytes; memset(aaaaaa, 'a', fillsize); if (! g_reparseDeferralEnabledDefault) { return; // heuristic is disabled; we would get O(n^2) and fail. } -#if ! defined(__linux__) - if (CLOCKS_PER_SEC < 100000) { - // Skip this test if clock() doesn't have reasonably good resolution. - // This workaround is primarily targeting Windows and FreeBSD, since - // XSI requires the value to be 1.000.000 (10x the condition here), and - // we want to be very sure that at least one platform in CI can catch - // regressions (through a failing test). - return; - } -#endif - clock_t baseline = 0; for (int i = 0; i < num_cases; ++i) { XML_Parser parser = XML_ParserCreate(NULL); assert_true(parser != NULL); enum XML_Status status; - set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown, - text[i].pre, text[i].post); - const clock_t start = clock(); + set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); // parse the start text + g_bytesScanned = 0; status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, (int)strlen(text[i].pre), XML_FALSE); if (status != XML_STATUS_OK) { xml_failure(parser); } + // parse lots of 'a', failing the test early if it takes too long + unsigned past_max_count = 0; for (int f = 0; f < fillcount; ++f) { status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); if (status != XML_STATUS_OK) { xml_failure(parser); } - // i == 0 means we're still calculating the baseline value - if (i > 0) { - const clock_t now = clock(); - const clock_t clocks_so_far = now - start; - const int slowdown = clocks_so_far / baseline; - if (slowdown >= max_slowdown) { - fprintf( - stderr, - "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n", - f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown); - fail(too_slow_failure_message); - } + if (g_bytesScanned > max_scanned) { + // We're not done, and have already passed the limit -- the test will + // definitely fail. This block allows us to save time by failing early. + const unsigned pushed + = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; + fprintf( + stderr, + "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + f + 1, fillcount, pushed, g_bytesScanned, + g_bytesScanned / (double)pushed, max_scanned, max_factor); + past_max_count++; + // We are failing, but allow a few log prints first. If we don't reach + // a count of five, the test will fail after the loop instead. + assert_true(past_max_count < 5); } } + // parse the end text status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, (int)strlen(text[i].post), XML_TRUE); @@ -5286,18 +5315,14 @@ START_TEST(test_big_tokens_take_linear_time) { xml_failure(parser); } - // how long did it take in total? - const clock_t end = clock(); - const clock_t taken = end - start; - if (i == 0) { - assert_true(taken > 0); // just to make sure we don't div-by-0 later - baseline = taken; - } - const int slowdown = taken / baseline; - if (slowdown >= max_slowdown) { - fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n", - (int)taken, (int)baseline, slowdown, max_slowdown); - fail(too_slow_failure_message); + assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working + if (g_bytesScanned > max_scanned) { + fprintf( + stderr, + "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", + g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, + max_factor); + fail("scanned too many bytes"); } XML_ParserFree(parser); @@ -5774,19 +5799,17 @@ START_TEST(test_varying_buffer_fills) { fillsize[2], fillsize[3]); XML_Parser parser = XML_ParserCreate(NULL); assert_true(parser != NULL); - g_parseAttempts = 0; CharData storage; CharData_Init(&storage); XML_SetUserData(parser, &storage); XML_SetStartElementHandler(parser, start_element_event_handler); + g_bytesScanned = 0; int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) - int scanned_bytes = 0; // sum of (buffered bytes at each actual parse) int offset = 0; while (*fillsize >= 0) { assert_true(offset + *fillsize <= document_length); // or test is invalid - const unsigned attempts_before = g_parseAttempts; const enum XML_Status status = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); if (status != XML_STATUS_OK) { @@ -5796,28 +5819,20 @@ START_TEST(test_varying_buffer_fills) { fillsize++; assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow worstcase_bytes += offset; // we might've tried to parse all pending bytes - if (g_parseAttempts != attempts_before) { - assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse - assert_true(offset <= INT_MAX - scanned_bytes); // avoid overflow - scanned_bytes += offset; // we *did* try to parse all pending bytes - } } assert_true(storage.count == 1); // the big token should've been parsed - assert_true(scanned_bytes > 0); // test-the-test: does our counter work? + assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? if (g_reparseDeferralEnabledDefault) { // heuristic is enabled; some XML_Parse calls may have deferred reparsing - const int max_bytes_scanned = -*fillsize; - if (scanned_bytes > max_bytes_scanned) { + const unsigned max_bytes_scanned = -*fillsize; + if (g_bytesScanned > max_bytes_scanned) { fprintf(stderr, - "bytes scanned in parse attempts: actual=%d limit=%d \n", - scanned_bytes, max_bytes_scanned); + "bytes scanned in parse attempts: actual=%u limit=%u \n", + g_bytesScanned, max_bytes_scanned); fail("too many bytes scanned in parse attempts"); } - assert_true(scanned_bytes <= worstcase_bytes); - } else { - // heuristic is disabled; every XML_Parse() will have reparsed - assert_true(scanned_bytes == worstcase_bytes); } + assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); XML_ParserFree(parser); } @@ -5972,6 +5987,8 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); tcase_add_test__ifdef_xml_dtd(tc_basic, test_recursive_external_parameter_entity); + tcase_add_test__ifdef_xml_dtd(tc_basic, + test_recursive_external_parameter_entity_2); tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); tcase_add_test(tc_basic, test_suspend_xdecl); tcase_add_test(tc_basic, test_abort_epilog); @@ -6065,7 +6082,7 @@ make_basic_test_case(Suite *s) { tcase_add_test__ifdef_xml_dtd(tc_basic, test_pool_integrity_with_unfinished_attr); tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); - tcase_add_test(tc_basic, test_big_tokens_take_linear_time); + tcase_add_test(tc_basic, test_big_tokens_scale_linearly); tcase_add_test(tc_basic, test_set_reparse_deferral); tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); diff --git a/lib/libexpat/tests/misc_tests.c b/lib/libexpat/tests/misc_tests.c index b5212f58a5b..ffde056310e 100644 --- a/lib/libexpat/tests/misc_tests.c +++ b/lib/libexpat/tests/misc_tests.c @@ -208,7 +208,7 @@ START_TEST(test_misc_version) { if (! versions_equal(&read_version, &parsed_version)) fail("Version mismatch"); - if (xcstrcmp(version_text, XCS("expat_2.6.0"))) /* needs bump on releases */ + if (xcstrcmp(version_text, XCS("expat_2.6.2"))) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); } END_TEST -- 2.20.1