textproc/libxml2: sync with upstream

Taken from: HardenedBSD
This commit is contained in:
Franco Fichtner 2021-06-22 11:40:09 +02:00
parent c305dffbb0
commit cdc8873fd3
13 changed files with 379 additions and 332 deletions

View File

@ -1,8 +1,8 @@
# Created by: Yukihiro Nakai <Nakai@technologist.com>
PORTNAME= libxml2
DISTVERSION= 2.9.10
PORTREVISION?= 4
DISTVERSION= 2.9.12
#PORTREVISION?= 0
CATEGORIES?= textproc gnome
MASTER_SITES= http://xmlsoft.org/sources/
DIST_SUBDIR= gnome2
@ -17,37 +17,42 @@ LICENSE_FILE_MIT= ${WRKSRC}/COPYING
LICENSE_FILE_TRIO= ${FILESDIR}/LICENSE.TRIO
LICENSE_PERMS_TRIO= dist-mirror dist-sell pkg-mirror pkg-sell auto-accept
GNU_CONFIGURE= yes
USES+= compiler cpe iconv libtool pathfix pkgconfig shebangfix
GNU_CONFIGURE= yes
CPE_VENDOR= xmlsoft
SHEBANG_FILES= *.py */*.py */*/*.py
USE_LDCONFIG= yes
INSTALL_TARGET= install-strip
TEST_TARGET= check
CONFIGURE_ARGS?=--with-iconv=${ICONV_PREFIX} \
--with-html-dir=${PREFIX}/share/doc \
--with-html-subdir=${PORTNAME} \
--without-icu \
--with-lzma=/usr \
--without-python
INSTALL_TARGET= install-strip
SHEBANG_FILES= *.py */*.py */*/*.py
PLIST_SUB+= LIBVERSION=${PORTVERSION}
PLIST_SUB+= LIBVERSION=${DISTVERSION}
.if !defined(MASTERDIR)
OPTIONS_DEFINE= SCHEMA VALIDATION THREADS MEM_DEBUG XMLLINT_HIST THREAD_ALLOC
OPTIONS_DEFAULT=SCHEMA VALIDATION THREADS
SCHEMA_DESC= XML schema support
SCHEMA_CONFIGURE_WITH= schemas
VALIDATION_DESC= Validation support
VALIDATION_CONFIGURE_OFF= --without-valid
THREADS_DESC= Threads support
THREADS_CONFIGURE_WITH= threads
MEM_DEBUG_DESC= Memory debugging (DEVELOPERS ONLY!)
MEM_DEBUG_CONFIGURE_WITH= mem-debug
XMLLINT_HIST_DESC= History for xmllint
XMLLINT_HIST_CONFIGURE_WITH= history
OPTIONS_DEFINE= MEM_DEBUG SCHEMA THREAD_ALLOC THREADS VALIDATION \
XMLLINT_HIST
OPTIONS_DEFAULT= SCHEMA VALIDATION THREADS
MEM_DEBUG_DESC= Memory debugging (DEVELOPERS ONLY!)
SCHEMA_DESC= XML schema support
THREAD_ALLOC_DESC= Per-thread memory (DEVELOPERS ONLY!)
VALIDATION_DESC= Validation support
XMLLINT_HIST_DESC= History for xmllint
MEM_DEBUG_CONFIGURE_WITH= mem-debug
SCHEMA_CONFIGURE_WITH= schemas
THREAD_ALLOC_CONFIGURE_WITH= thread-alloc
THREADS_CONFIGURE_WITH= threads
VALIDATION_CONFIGURE_OFF= --without-valid
XMLLINT_HIST_CONFIGURE_WITH= history
.endif # !defined(MASTERDIR)

View File

@ -1,11 +1,3 @@
TIMESTAMP = 1602549798
SHA256 (gnome2/libxml2-2.9.10.tar.gz) = aafee193ffb8fe0c82d4afef6ef91972cbaf5feea100edc2f262750611b4be1f
SIZE (gnome2/libxml2-2.9.10.tar.gz) = 5624761
SHA256 (gnome2/7ffcd44d7e6c46704f8af0321d9314cd26e0e18a.patch) = 8bab1a7fcc22a8f9a3f89648660bbca424196d82967e213bd27c1dcc9a9544a5
SIZE (gnome2/7ffcd44d7e6c46704f8af0321d9314cd26e0e18a.patch) = 1015
SHA256 (gnome2/0e1a49c8907645d2e155f0d89d4d9895ac5112b5.patch) = 4a1dca36e762a0e2affb0779918fbf1665a00d984ffbd3efa45d3d202f87ea8c
SIZE (gnome2/0e1a49c8907645d2e155f0d89d4d9895ac5112b5.patch) = 996
SHA256 (gnome2/50f06b3efb638efb0abd95dc62dca05ae67882c2.patch) = 701048e726e2f3f7f2a71a7054030fc154b5edace72e23c5934ecd9ee09ad811
SIZE (gnome2/50f06b3efb638efb0abd95dc62dca05ae67882c2.patch) = 1052
SHA256 (gnome2/edc7b6abb0c125eeb888748c334897f60aab0854.patch) = eac708cc0bcb19c59c63874e5518f9084b177c8a10981539d90ba41d9e8414a1
SIZE (gnome2/edc7b6abb0c125eeb888748c334897f60aab0854.patch) = 3019
TIMESTAMP = 1622963062
SHA256 (gnome2/libxml2-2.9.12.tar.gz) = c8d6681e38c56f172892c85ddc0852e1fd4b53b4209e7f4ebf17f7e2eae71d92
SIZE (gnome2/libxml2-2.9.12.tar.gz) = 5681632

View File

@ -1,33 +0,0 @@
From 7ffcd44d7e6c46704f8af0321d9314cd26e0e18a Mon Sep 17 00:00:00 2001
From: Zhipeng Xie <xiezhipeng1@huawei.com>
Date: Tue, 20 Aug 2019 16:33:06 +0800
Subject: [PATCH] Fix memory leak in xmlSchemaValidateStream
When ctxt->schema is NULL, xmlSchemaSAXPlug->xmlSchemaPreRun
alloc a new schema for ctxt->schema and set vctxt->xsiAssemble
to 1. Then xmlSchemaVStart->xmlSchemaPreRun initialize
vctxt->xsiAssemble to 0 again which cause the alloced schema
can not be freed anymore.
Found with libFuzzer.
Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
xmlschemas.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/xmlschemas.c b/xmlschemas.c
index 301c8449..39d92182 100644
--- xmlschemas.c
+++ xmlschemas.c
@@ -28090,7 +28090,6 @@ xmlSchemaPreRun(xmlSchemaValidCtxtPtr vctxt) {
vctxt->nberrors = 0;
vctxt->depth = -1;
vctxt->skipDepth = -1;
- vctxt->xsiAssemble = 0;
vctxt->hasKeyrefs = 0;
#ifdef ENABLE_IDC_NODE_TABLES_TEST
vctxt->createIDCNodeTables = 1;
--
GitLab

View File

@ -1,36 +0,0 @@
From 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Fri, 7 Aug 2020 21:54:27 +0200
Subject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'
Make sure that truncated UTF-8 sequences don't cause an out-of-bounds
array access.
Thanks to @SuhwanSong and the Agency for Defense Development (ADD) for
the report.
Fixes #178.
---
xmllint.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/xmllint.c b/xmllint.c
index f6a8e463..c647486f 100644
--- xmllint.c
+++ xmllint.c
@@ -528,6 +528,12 @@ static void
xmlHTMLEncodeSend(void) {
char *result;
+ /*
+ * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
+ * end with a truncated UTF-8 sequence. This is a hack to at least avoid
+ * an out-of-bounds read.
+ */
+ memset(&buffer[sizeof(buffer)-4], 0, 4);
result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
if (result) {
xmlGenericError(xmlGenericErrorContext, "%s", result);
--
GitLab

View File

@ -1,32 +0,0 @@
From 0e1a49c8907645d2e155f0d89d4d9895ac5112b5 Mon Sep 17 00:00:00 2001
From: Zhipeng Xie <xiezhipeng1@huawei.com>
Date: Thu, 12 Dec 2019 17:30:55 +0800
Subject: [PATCH] Fix infinite loop in xmlStringLenDecodeEntities
When ctxt->instate == XML_PARSER_EOF,xmlParseStringEntityRef
return NULL which cause a infinite loop in xmlStringLenDecodeEntities
Found with libFuzzer.
Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
parser.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/parser.c b/parser.c
index d1c31963..a34bb6cd 100644
--- parser.c
+++ parser.c
@@ -2646,7 +2646,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
else
c = 0;
while ((c != 0) && (c != end) && /* non input consuming loop */
- (c != end2) && (c != end3)) {
+ (c != end2) && (c != end3) &&
+ (ctxt->instate != XML_PARSER_EOF)) {
if (c == 0) break;
if ((c == '&') && (str[1] == '#')) {
--
GitLab

View File

@ -1,67 +0,0 @@
From 8598060bacada41a0eb09d95c97744ff4e428f8e Mon Sep 17 00:00:00 2001
From: Daniel Veillard <veillard@redhat.com>
Date: Thu, 13 May 2021 14:55:12 +0200
Subject: [PATCH] Patch for security issue CVE-2021-3541
This is relapted to parameter entities expansion and following
the line of the billion laugh attack. Somehow in that path the
counting of parameters was missed and the normal algorithm based
on entities "density" was useless.
---
parser.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git parser.c parser.c
index f5e5e169..c9312fa4 100644
--- parser.c
+++ parser.c
@@ -140,6 +140,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
xmlEntityPtr ent, size_t replacement)
{
size_t consumed = 0;
+ int i;
if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
return (0);
@@ -177,6 +178,28 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
rep = NULL;
}
}
+
+ /*
+ * Prevent entity exponential check, not just replacement while
+ * parsing the DTD
+ * The check is potentially costly so do that only once in a thousand
+ */
+ if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
+ (ctxt->nbentities % 1024 == 0)) {
+ for (i = 0;i < ctxt->inputNr;i++) {
+ consumed += ctxt->inputTab[i]->consumed +
+ (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
+ }
+ if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
+ xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
+ ctxt->instate = XML_PARSER_EOF;
+ return (1);
+ }
+ consumed = 0;
+ }
+
+
+
if (replacement != 0) {
if (replacement < XML_MAX_TEXT_LENGTH)
return(0);
@@ -7963,6 +7986,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
xmlChar start[4];
xmlCharEncoding enc;
+ if (xmlParserEntityCheck(ctxt, 0, entity, 0))
+ return;
+
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((ctxt->options & XML_PARSE_NOENT) == 0) &&
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
--
2.31.1

View File

@ -1,6 +1,24 @@
--- Makefile.in.orig 2019-11-16 14:42:34 UTC
--- Makefile.in.orig 2021-06-09 19:53:33 UTC
+++ Makefile.in
@@ -1284,7 +1284,7 @@ am--depfiles: $(am__depfiles_remade)
@@ -760,7 +760,7 @@ man_MANS = xml2-config.1 libxml.3
m4datadir = $(datadir)/aclocal
m4data_DATA = libxml.m4
runtest_SOURCES = runtest.c
-runtest_LDFLAGS =
+runtest_LDFLAGS = -pthread
runtest_DEPENDENCIES = $(DEPS)
runtest_LDADD = $(BASE_THREAD_LIBS) $(RDL_LIBS) $(LDADDS)
testrecurse_SOURCES = testrecurse.c
@@ -808,7 +808,7 @@ testC14N_LDFLAGS =
testC14N_DEPENDENCIES = $(DEPS)
testC14N_LDADD = $(LDADDS)
testThreads_SOURCES = testThreads.c
-testThreads_LDFLAGS =
+testThreads_LDFLAGS = -pthread
testThreads_DEPENDENCIES = $(DEPS)
testThreads_LDADD = $(BASE_THREAD_LIBS) $(LDADDS)
testURI_SOURCES = testURI.c
@@ -1285,7 +1285,7 @@ am--depfiles: $(am__depfiles_remade)
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
.c.lo:
@ -9,7 +27,7 @@
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@@ -1773,7 +1773,7 @@ check-am: all-am
@@ -1774,7 +1774,7 @@ check-am: all-am
$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-recursive
@ -18,7 +36,7 @@
config.h
install-binPROGRAMS: install-libLTLIBRARIES
@@ -1910,7 +1910,7 @@ info: info-recursive
@@ -1911,7 +1911,7 @@ info: info-recursive
info-am:

View File

@ -1,92 +0,0 @@
From edc7b6abb0c125eeb888748c334897f60aab0854 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
Date: Fri, 28 Feb 2020 12:48:14 +0100
Subject: [PATCH] Parenthesize Py<type>_Check() in ifs
In C, if expressions should be parenthesized.
PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized
expression before, but that's not API to rely on.
Since Python 3.9.0a4 it needs to be parenthesized explicitly.
Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149
---
python/libxml.c | 4 ++--
python/types.c | 12 ++++++------
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/python/libxml.c b/python/libxml.c
index bc676c4e..81e709f3 100644
--- python/libxml.c
+++ python/libxml.c
@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
lenread = PyBytes_Size(ret);
data = PyBytes_AsString(ret);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (ret) {
+ } else if (PyUnicode_Check (ret)) {
#if PY_VERSION_HEX >= 0x03030000
Py_ssize_t size;
const char *tmp;
diff --git a/python/types.c b/python/types.c
index c2bafeb1..ed284ec7 100644
--- python/types.c
+++ python/types.c
@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
if (obj == NULL) {
return (NULL);
}
- if PyFloat_Check (obj) {
+ if (PyFloat_Check (obj)) {
ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj));
- } else if PyLong_Check(obj) {
+ } else if (PyLong_Check(obj)) {
#ifdef PyLong_AS_LONG
ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj));
#else
ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj));
#endif
#ifdef PyBool_Check
- } else if PyBool_Check (obj) {
+ } else if (PyBool_Check (obj)) {
if (obj == Py_True) {
ret = xmlXPathNewBoolean(1);
@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathNewBoolean(0);
}
#endif
- } else if PyBytes_Check (obj) {
+ } else if (PyBytes_Check (obj)) {
xmlChar *str;
str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj),
PyBytes_GET_SIZE(obj));
ret = xmlXPathWrapString(str);
#ifdef PyUnicode_Check
- } else if PyUnicode_Check (obj) {
+ } else if (PyUnicode_Check (obj)) {
#if PY_VERSION_HEX >= 0x03030000
xmlChar *str;
const char *tmp;
@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
ret = xmlXPathWrapString(str);
#endif
#endif
- } else if PyList_Check (obj) {
+ } else if (PyList_Check (obj)) {
int i;
PyObject *node;
xmlNodePtr cur;
--
GitLab

View File

@ -0,0 +1,211 @@
From 85b1792e37b131e7a51af98a37f92472e8de5f3f Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Tue, 18 May 2021 20:08:28 +0200
Subject: [PATCH] Work around lxml API abuse
Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted
parent pointers. This used to work with the old recursive code but the
non-recursive rewrite required parent pointers to be set correctly.
Unfortunately, lxml relies on the old behavior and passes subtrees with
a corrupted structure. Fall back to a recursive function call if an
invalid parent pointer is detected.
Fixes #255.
---
HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------
xmlsave.c | 31 +++++++++++++++++++++----------
2 files changed, 49 insertions(+), 28 deletions(-)
diff --git a/HTMLtree.c b/HTMLtree.c
index 24434d45..bdd639c7 100644
--- HTMLtree.c
+++ HTMLtree.c
@@ -744,7 +744,7 @@ void
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
int format) {
- xmlNodePtr root;
+ xmlNodePtr root, parent;
xmlAttrPtr attr;
const htmlElemDesc * info;
@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
}
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_HTML_DOCUMENT_NODE:
@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
break;
case XML_ELEMENT_NODE:
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
+ break;
+ }
+
/*
* Get specific HTML info for that node.
*/
@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->name != NULL) &&
(cur->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
+ parent = cur;
cur = cur->children;
continue;
}
@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(info != NULL) && (!info->isinline)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
if (((cur->name == (const xmlChar *)xmlStringText) ||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
- ((cur->parent == NULL) ||
- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
+ ((parent == NULL) ||
+ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
+ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
xmlChar *buffer;
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
(cur->type == XML_DOCUMENT_NODE)) {
@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
(cur->next != NULL)) {
if ((cur->next->type != HTML_TEXT_NODE) &&
(cur->next->type != HTML_ENTITY_REF_NODE) &&
- (cur->parent != NULL) &&
- (cur->parent->name != NULL) &&
- (cur->parent->name[0] != 'p')) /* p, pre, param */
+ (parent != NULL) &&
+ (parent->name != NULL) &&
+ (parent->name[0] != 'p')) /* p, pre, param */
xmlOutputBufferWriteString(buf, "\n");
}
}
diff --git a/xmlsave.c b/xmlsave.c
index 61a40459..aedbd5e7 100644
--- xmlsave.c
+++ xmlsave.c
@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
static void
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
int format = ctxt->format;
- xmlNodePtr tmp, root, unformattedNode = NULL;
+ xmlNodePtr tmp, root, unformattedNode = NULL, parent;
xmlAttrPtr attr;
xmlChar *start, *end;
xmlOutputBufferPtr buf;
@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
buf = ctxt->buf;
root = cur;
+ parent = cur->parent;
while (1) {
switch (cur->type) {
case XML_DOCUMENT_NODE:
@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_DOCUMENT_FRAG_NODE:
- if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if ((cur->parent == parent) && (cur->children != NULL)) {
+ parent = cur;
cur = cur->children;
continue;
}
@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_ELEMENT_NODE:
- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
+ /*
+ * Some users like lxml are known to pass nodes with a corrupted
+ * tree structure. Fall back to a recursive call to handle this
+ * case.
+ */
+ if ((cur->parent != parent) && (cur->children != NULL)) {
+ xmlNodeDumpOutputInternal(ctxt, cur);
+ break;
+ }
+
+ if ((ctxt->level > 0) && (ctxt->format == 1) &&
+ (xmlIndentTreeOutput))
xmlOutputBufferWrite(buf, ctxt->indent_size *
(ctxt->level > ctxt->indent_nr ?
ctxt->indent_nr : ctxt->level),
@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
xmlOutputBufferWrite(buf, 1, ">");
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
if (ctxt->level >= 0) ctxt->level++;
+ parent = cur;
cur = cur->children;
continue;
}
@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
}
- /*
- * The parent should never be NULL here but we want to handle
- * corrupted documents gracefully.
- */
- if (cur->parent == NULL)
- return;
- cur = cur->parent;
+ cur = parent;
+ /* cur->parent was validated when descending. */
+ parent = cur->parent;
if (cur->type == XML_ELEMENT_NODE) {
if (ctxt->level > 0) ctxt->level--;
--
GitLab

View File

@ -0,0 +1,46 @@
From 13ad8736d294536da4cbcd70a96b0a2fbf47070c Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Tue, 25 May 2021 10:55:25 +0200
Subject: [PATCH] Fix regression in xmlNodeDumpOutputInternal
Commit 85b1792e could cause additional whitespace if xmlNodeDump was
called with a non-zero starting level.
---
xmlsave.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/xmlsave.c b/xmlsave.c
index aedbd5e7..489505f4 100644
--- xmlsave.c
+++ xmlsave.c
@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
case XML_ELEMENT_NODE:
+ if ((cur != root) && (ctxt->format == 1) &&
+ (xmlIndentTreeOutput))
+ xmlOutputBufferWrite(buf, ctxt->indent_size *
+ (ctxt->level > ctxt->indent_nr ?
+ ctxt->indent_nr : ctxt->level),
+ ctxt->indent);
+
/*
* Some users like lxml are known to pass nodes with a corrupted
* tree structure. Fall back to a recursive call to handle this
@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
break;
}
- if ((ctxt->level > 0) && (ctxt->format == 1) &&
- (xmlIndentTreeOutput))
- xmlOutputBufferWrite(buf, ctxt->indent_size *
- (ctxt->level > ctxt->indent_nr ?
- ctxt->indent_nr : ctxt->level),
- ctxt->indent);
-
xmlOutputBufferWrite(buf, 1, "<");
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
--
GitLab

View File

@ -0,0 +1,31 @@
From 3e1aad4fe584747fd7d17cc7b2863a78e2d21a77 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Wed, 2 Jun 2021 17:31:49 +0200
Subject: [PATCH] Fix XPath recursion limit
Fix accounting of recursion depth when parsing XPath expressions.
This silly bug introduced in commit 804c5297 could lead to spurious
errors when parsing larger expressions or XSLT documents.
Should fix #264.
---
xpath.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/xpath.c b/xpath.c
index 7497ba07..1aa2f1ab 100644
--- xpath.c
+++ xpath.c
@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
}
if (xpctxt != NULL)
- xpctxt->depth -= 1;
+ xpctxt->depth -= 10;
}
/**
--
GitLab

View File

@ -0,0 +1,43 @@
From 92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Mon, 7 Jun 2021 15:09:53 +0200
Subject: [PATCH] Fix whitespace when serializing empty HTML documents
The old, non-recursive HTML serialization code would always terminate
the output with a newline. The new implementation omitted the newline
if the document node had no children. Readd the newline when
serializing empty documents.
Fixes #266.
---
HTMLtree.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/HTMLtree.c b/HTMLtree.c
index bdd639c7..7a2b8558 100644
--- HTMLtree.c
+++ HTMLtree.c
@@ -763,11 +763,15 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
if (((xmlDocPtr) cur)->intSubset != NULL) {
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
}
- /* Always validate cur->parent when descending. */
- if ((cur->parent == parent) && (cur->children != NULL)) {
- parent = cur;
- cur = cur->children;
- continue;
+ if (cur->children != NULL) {
+ /* Always validate cur->parent when descending. */
+ if (cur->parent == parent) {
+ parent = cur;
+ cur = cur->children;
+ continue;
+ }
+ } else {
+ xmlOutputBufferWriteString(buf, "\n");
}
break;
--
GitLab

View File

@ -1,39 +0,0 @@
commit 106757e8c1e26ad9b8c924c7f304074b79e082c5
Author: Daniel Cheng <dcheng@google.com>
Date: Fri Apr 10 14:52:03 2020 -0700
Guard new calls to xmlValidatePopElement in xml_reader.c
Closes #154.
commit 386fb27654b93d9fb2880e03fb508d618a2e66f1
Author: Łukasz Wojniłowicz <lukasz.wojnilowicz@gmail.com>
Date: Tue Apr 28 17:00:37 2020 +0200
Add LIBXML_VALID_ENABLED to xmlreader
There are already LIBXML_VALID_ENABLED in this file to guard against
"--without-valid" at "./configure" step, but here they were missing.
diff --git xmlreader.c xmlreader.c
index 687c8b3c..3fd9aa4c 100644
--- xmlreader.c
+++ xmlreader.c
@@ -2260,14 +2260,18 @@ xmlFreeTextReader(xmlTextReaderPtr reader) {
if (reader->ctxt != NULL) {
if (reader->dict == reader->ctxt->dict)
reader->dict = NULL;
+#ifdef LIBXML_VALID_ENABLED
if ((reader->ctxt->vctxt.vstateTab != NULL) &&
(reader->ctxt->vctxt.vstateMax > 0)){
+#ifdef LIBXML_REGEXP_ENABLED
while (reader->ctxt->vctxt.vstateNr > 0)
xmlValidatePopElement(&reader->ctxt->vctxt, NULL, NULL, NULL);
+#endif /* LIBXML_REGEXP_ENABLED */
xmlFree(reader->ctxt->vctxt.vstateTab);
reader->ctxt->vctxt.vstateTab = NULL;
reader->ctxt->vctxt.vstateMax = 0;
}
+#endif /* LIBXML_VALID_ENABLED */
if (reader->ctxt->myDoc != NULL) {
if (reader->preserve == 0)
xmlTextReaderFreeDoc(reader, reader->ctxt->myDoc);