From 680fbc6091b7c45d0c40adef04d6bb927157d597 Mon Sep 17 00:00:00 2001
From: bluhm
Date: Wed, 9 Mar 2022 19:22:19 +0000
Subject: [PATCH] Update libexpat to 2.4.7. Relevant for OpenBSD are bug fixes
#572 #577 and other changes #577 #579 #575 #574 #569 #571. No library bump
necessary. tested and OK tb@
---
lib/libexpat/Changes | 34 ++++++++
lib/libexpat/README.md | 2 +-
lib/libexpat/doc/reference.html | 20 +++--
lib/libexpat/lib/expat.h | 22 ++++-
lib/libexpat/lib/xmlparse.c | 147 +++++++++++++++++++++++++++++---
lib/libexpat/tests/runtests.c | 25 ++++--
6 files changed, 223 insertions(+), 27 deletions(-)
diff --git a/lib/libexpat/Changes b/lib/libexpat/Changes
index 40127e1b76f..95f697b39a4 100644
--- a/lib/libexpat/Changes
+++ b/lib/libexpat/Changes
@@ -2,6 +2,40 @@ NOTE: We are looking for help with a few things:
https://github.com/libexpat/libexpat/labels/help%20wanted
If you can help, please get in touch. Thanks!
+Release 2.4.7 Fri March 4 2022
+ Bug fixes:
+ #572 #577 Relax fix to CVE-2022-25236 (introduced with release 2.4.5)
+ with regard to all valid URI characters (RFC 3986),
+ i.e. the following set (excluding whitespace):
+ ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz
+ 0123456789 % -._~ :/?#[]@ !$&'()*+,;=
+
+ Other changes:
+ #555 #570 #581 CMake|Windows: Store Expat version in the DLL
+ #577 Document consequences of namespace separator choices not just
+ in doc/reference.html but also in header
+ #577 Document Expat's lack of validation of namespace URIs against
+ RFC 3986, and that the XML 1.0r4 specification doesn't
+ require Expat to validate namespace URIs, and that Expat
+ may do more in that regard in future releases.
+ If you find need for strict RFC 3986 URI validation on
+ application level today, https://uriparser.github.io/ may
+ be of interest.
+ #579 Fix documentation of XML_EndDoctypeDeclHandler in
+ #575 Document that a call to XML_FreeContentModel can be done at
+ a later time from outside the element declaration handler
+ #574 Make hardcoded namespace URIs easier to find in code
+ #573 Update documentation on use of XML_POOR_ENTOPY on Solaris
+ #569 #571 tests: Resolve use of macros NAN and INFINITY for GNU G++
+ 4.8.2 on Solaris.
+ #578 #580 Version info bumped from 9:6:8 to 9:7:8;
+ see https://verbump.de/ for what these numbers do
+
+ Special thanks to:
+ Jeffrey Walton
+ Johnny Jazeix
+ Thijs Schreijer
+
Release 2.4.6 Sun February 20 2022
Bug fixes:
#566 Fix a regression introduced by the fix for CVE-2022-25313
diff --git a/lib/libexpat/README.md b/lib/libexpat/README.md
index 959c4a6e94a..6bfbf130dbf 100644
--- a/lib/libexpat/README.md
+++ b/lib/libexpat/README.md
@@ -5,7 +5,7 @@
[![Downloads GitHub](https://img.shields.io/github/downloads/libexpat/libexpat/total?label=Downloads%20GitHub)](https://github.com/libexpat/libexpat/releases)
-# Expat, Release 2.4.6
+# Expat, Release 2.4.7
This is Expat, a C library for parsing XML, started by
[James Clark](https://en.wikipedia.org/wiki/James_Clark_%28programmer%29) in 1997.
diff --git a/lib/libexpat/doc/reference.html b/lib/libexpat/doc/reference.html
index 26db5a63547..87ace02d456 100644
--- a/lib/libexpat/doc/reference.html
+++ b/lib/libexpat/doc/reference.html
@@ -18,6 +18,7 @@
Copyright (c) 2017 Jakub Wilk
Copyright (c) 2021 Tomas Korbar
Copyright (c) 2021 Nicolas Cavallari
+ Copyright (c) 2022 Thijs Schreijer
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -49,7 +50,7 @@
The Expat XML Parser
- Release 2.4.6
+ Release 2.4.7
@@ -974,6 +975,14 @@ the local part will be concatenated without any separator - this is intended
to support RDF processors. It is a programming error to use the null separator
with
namespace triplets.
+Note:
+Expat does not validate namespace URIs (beyond encoding)
+against RFC 3986 today (and is not required to do so with regard to
+the XML 1.0 namespaces specification) but it may start doing that
+in future releases. Before that, an application using Expat must
+be ready to receive namespace URIs containing non-URI characters.
+
+
XML_ParserCreate_MM
XML_Parser XMLCALL
@@ -1808,10 +1817,11 @@ struct XML_cp {
Sets a handler for element declarations in a DTD. The handler gets
called with the name of the element in the declaration and a pointer
-to a structure that contains the element model. It is the
-application's responsibility to free this data structure using
-XML_FreeContentModel
.
+to a structure that contains the element model. It's the user code's
+responsibility to free model when finished with it. See
+XML_FreeContentModel
.
+There is no need to free the model from the handler, it can be kept
+around and freed at a later stage.
The model
argument is the root of a tree of
XML_Content
nodes. If type
equals
diff --git a/lib/libexpat/lib/expat.h b/lib/libexpat/lib/expat.h
index 46a0e1bcd22..c9214f64070 100644
--- a/lib/libexpat/lib/expat.h
+++ b/lib/libexpat/lib/expat.h
@@ -15,6 +15,7 @@
Copyright (c) 2016 Cristian RodrÃguez
Copyright (c) 2016 Thomas Beutlich
Copyright (c) 2017 Rhodri James
+ Copyright (c) 2022 Thijs Schreijer
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -174,8 +175,10 @@ struct XML_cp {
};
/* This is called for an element declaration. See above for
- description of the model argument. It's the caller's responsibility
- to free model when finished with it.
+ description of the model argument. It's the user code's responsibility
+ to free model when finished with it. See XML_FreeContentModel.
+ There is no need to free the model from the handler, it can be kept
+ around and freed at a later stage.
*/
typedef void(XMLCALL *XML_ElementDeclHandler)(void *userData,
const XML_Char *name,
@@ -237,6 +240,17 @@ XML_ParserCreate(const XML_Char *encoding);
and the local part will be concatenated without any separator.
It is a programming error to use the separator '\0' with namespace
triplets (see XML_SetReturnNSTriplet).
+ If a namespace separator is chosen that can be part of a URI or
+ part of an XML name, splitting an expanded name back into its
+ 1, 2 or 3 original parts on application level in the element handler
+ may end up vulnerable, so these are advised against; sane choices for
+ a namespace separator are e.g. '\n' (line feed) and '|' (pipe).
+
+ Note that Expat does not validate namespace URIs (beyond encoding)
+ against RFC 3986 today (and is not required to do so with regard to
+ the XML 1.0 namespaces specification) but it may start doing that
+ in future releases. Before that, an application using Expat must
+ be ready to receive namespace URIs containing non-URI characters.
*/
XMLPARSEAPI(XML_Parser)
XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator);
@@ -317,7 +331,7 @@ typedef void(XMLCALL *XML_StartDoctypeDeclHandler)(void *userData,
const XML_Char *pubid,
int has_internal_subset);
-/* This is called for the start of the DOCTYPE declaration when the
+/* This is called for the end of the DOCTYPE declaration when the
closing > is encountered, but after processing any external
subset.
*/
@@ -1041,7 +1055,7 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
*/
#define XML_MAJOR_VERSION 2
#define XML_MINOR_VERSION 4
-#define XML_MICRO_VERSION 6
+#define XML_MICRO_VERSION 7
#ifdef __cplusplus
}
diff --git a/lib/libexpat/lib/xmlparse.c b/lib/libexpat/lib/xmlparse.c
index 7db28d07acb..05216d997b0 100644
--- a/lib/libexpat/lib/xmlparse.c
+++ b/lib/libexpat/lib/xmlparse.c
@@ -1,4 +1,4 @@
-/* a30d2613dcfdef81475a9d1a349134d2d42722172fdaa7d5bb12ed2aa74b9596 (2.4.6+)
+/* fcb1a62fefa945567301146eb98e3ad3413e823a41c4378e84e8b6b6f308d824 (2.4.7+)
__ __ _
___\ \/ /_ __ __ _| |_
/ _ \\ /| '_ \ / _` | __|
@@ -34,6 +34,7 @@
Copyright (c) 2019 Vadim Zeitlin
Copyright (c) 2021 Dong-hee Na
Copyright (c) 2022 Samanta Navarro
+ Copyright (c) 2022 Jeffrey Walton
Licensed under the MIT license:
Permission is hereby granted, free of charge, to any person obtaining
@@ -133,7 +134,7 @@
* BSD / macOS (including <10.7) (arc4random): HAVE_ARC4RANDOM, \
* libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \
* libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \
- * Linux (including <3.17) / BSD / macOS (including <10.7) (/dev/urandom): XML_DEV_URANDOM, \
+ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \
* Windows >=Vista (rand_s): _WIN32. \
\
If insist on not using any of these, bypass this error by defining \
@@ -722,6 +723,7 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) {
return XML_ParserCreate_MM(encodingName, NULL, tmp);
}
+// "xml=http://www.w3.org/XML/1998/namespace"
static const XML_Char implicitContext[]
= {ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h,
ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
@@ -3704,12 +3706,124 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr,
return XML_ERROR_NONE;
}
+static XML_Bool
+is_rfc3986_uri_char(XML_Char candidate) {
+ // For the RFC 3986 ANBF grammar see
+ // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+
+ switch (candidate) {
+ // From rule "ALPHA" (uppercase half)
+ case 'A':
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'E':
+ case 'F':
+ case 'G':
+ case 'H':
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P':
+ case 'Q':
+ case 'R':
+ case 'S':
+ case 'T':
+ case 'U':
+ case 'V':
+ case 'W':
+ case 'X':
+ case 'Y':
+ case 'Z':
+
+ // From rule "ALPHA" (lowercase half)
+ case 'a':
+ case 'b':
+ case 'c':
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'g':
+ case 'h':
+ case 'i':
+ case 'j':
+ case 'k':
+ case 'l':
+ case 'm':
+ case 'n':
+ case 'o':
+ case 'p':
+ case 'q':
+ case 'r':
+ case 's':
+ case 't':
+ case 'u':
+ case 'v':
+ case 'w':
+ case 'x':
+ case 'y':
+ case 'z':
+
+ // From rule "DIGIT"
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+
+ // From rule "pct-encoded"
+ case '%':
+
+ // From rule "unreserved"
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+
+ // From rule "gen-delims"
+ case ':':
+ case '/':
+ case '?':
+ case '#':
+ case '[':
+ case ']':
+ case '@':
+
+ // From rule "sub-delims"
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ return XML_TRUE;
+
+ default:
+ return XML_FALSE;
+ }
+}
+
/* addBinding() overwrites the value of prefix->binding without checking.
Therefore one must keep track of the old value outside of addBinding().
*/
static enum XML_Error
addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
const XML_Char *uri, BINDING **bindingsPtr) {
+ // "http://www.w3.org/XML/1998/namespace"
static const XML_Char xmlNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON,
ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
@@ -3720,6 +3834,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
ASCII_e, '\0'};
static const int xmlLen = (int)sizeof(xmlNamespace) / sizeof(XML_Char) - 1;
+ // "http://www.w3.org/2000/xmlns/"
static const XML_Char xmlnsNamespace[]
= {ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH,
ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w,
@@ -3760,14 +3875,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
&& (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
isXMLNS = XML_FALSE;
- // NOTE: While Expat does not validate namespace URIs against RFC 3986,
- // we have to at least make sure that the XML processor on top of
- // Expat (that is splitting tag names by namespace separator into
- // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused
- // by an attacker putting additional namespace separator characters
- // into namespace declarations. That would be ambiguous and not to
- // be expected.
- if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) {
+ // NOTE: While Expat does not validate namespace URIs against RFC 3986
+ // today (and is not REQUIRED to do so with regard to the XML 1.0
+ // namespaces specification) we have to at least make sure, that
+ // the application on top of Expat (that is likely splitting expanded
+ // element names ("qualified names") of form
+ // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces
+ // in its element handler code) cannot be confused by an attacker
+ // putting additional namespace separator characters into namespace
+ // declarations. That would be ambiguous and not to be expected.
+ //
+ // While the HTML API docs of function XML_ParserCreateNS have been
+ // advising against use of a namespace separator character that can
+ // appear in a URI for >20 years now, some widespread applications
+ // are using URI characters (':' (colon) in particular) for a
+ // namespace separator, in practice. To keep these applications
+ // functional, we only reject namespaces URIs containing the
+ // application-chosen namespace separator if the chosen separator
+ // is a non-URI character with regard to RFC 3986.
+ if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)
+ && ! is_rfc3986_uri_char(uri[len])) {
return XML_ERROR_SYNTAX;
}
}
diff --git a/lib/libexpat/tests/runtests.c b/lib/libexpat/tests/runtests.c
index 6d6f66909a1..3309bbaa076 100644
--- a/lib/libexpat/tests/runtests.c
+++ b/lib/libexpat/tests/runtests.c
@@ -54,7 +54,6 @@
#include
#include
#include /* intptr_t uint64_t */
-#include /* NAN, INFINITY, isnan */
#if ! defined(__cplusplus)
# include
@@ -7407,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) {
struct test_case {
enum XML_Status expectedStatus;
const char *doc;
+ XML_Char namesep;
};
struct test_case cases[] = {
- {XML_STATUS_OK, ""},
- {XML_STATUS_ERROR, ""},
+ {XML_STATUS_OK, "", XCS('\n')},
+ {XML_STATUS_ERROR, "", XCS('\n')},
+ {XML_STATUS_OK, "", XCS(':')},
};
size_t i = 0;
size_t failCount = 0;
for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
- XML_Parser parser = XML_ParserCreateNS(NULL, '\n');
+ XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc),
/*isFinal*/ XML_TRUE)
@@ -7588,7 +7589,7 @@ START_TEST(test_misc_version) {
fail("Version mismatch");
#if ! defined(XML_UNICODE) || defined(XML_UNICODE_WCHAR_T)
- if (xcstrcmp(version_text, XCS("expat_2.4.6"))) /* needs bump on releases */
+ if (xcstrcmp(version_text, XCS("expat_2.4.7"))) /* needs bump on releases */
fail("XML_*_VERSION in expat.h out of sync?\n");
#else
/* If we have XML_UNICODE defined but not XML_UNICODE_WCHAR_T
@@ -11762,6 +11763,16 @@ START_TEST(test_accounting_precision) {
}
END_TEST
+static float
+portableNAN() {
+ return strtof("nan", NULL);
+}
+
+static float
+portableINFINITY() {
+ return strtof("infinity", NULL);
+}
+
START_TEST(test_billion_laughs_attack_protection_api) {
XML_Parser parserWithoutParent = XML_ParserCreate(NULL);
XML_Parser parserWithParent
@@ -11780,7 +11791,7 @@ START_TEST(test_billion_laughs_attack_protection_api) {
== XML_TRUE)
fail("Call with non-root parser is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
- parserWithoutParent, NAN)
+ parserWithoutParent, portableNAN())
== XML_TRUE)
fail("Call with NaN limit is NOT supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
@@ -11802,7 +11813,7 @@ START_TEST(test_billion_laughs_attack_protection_api) {
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
if (XML_SetBillionLaughsAttackProtectionMaximumAmplification(
- parserWithoutParent, INFINITY)
+ parserWithoutParent, portableINFINITY())
== XML_FALSE)
fail("Call with positive limit >=1.0 is supposed to succeed");
--
2.20.1