textproc/libxml2: sync with upstream

Taken from: HardenedBSD
2021-06-22 11:40:09 +02:00 · 2021-06-22 11:40:09 +02:00 · cdc8873fd3
parent c305dffbb0
commit cdc8873fd3
13 changed files with 379 additions and 332 deletions
--- a/textproc/libxml2/Makefile
+++ b/textproc/libxml2/Makefile
@ -1,8 +1,8 @@
 # Created by: Yukihiro Nakai <Nakai@technologist.com>

 PORTNAME=	libxml2
-DISTVERSION=	2.9.10
-PORTREVISION?=	4
+DISTVERSION=	2.9.12
+#PORTREVISION?=	0
 CATEGORIES?=	textproc gnome
 MASTER_SITES=	http://xmlsoft.org/sources/
 DIST_SUBDIR=	gnome2
@ -17,37 +17,42 @@ LICENSE_FILE_MIT=	${WRKSRC}/COPYING
 LICENSE_FILE_TRIO=	${FILESDIR}/LICENSE.TRIO
 LICENSE_PERMS_TRIO=	dist-mirror dist-sell pkg-mirror pkg-sell auto-accept

-GNU_CONFIGURE=	yes
 USES+=		compiler cpe iconv libtool pathfix pkgconfig shebangfix
+GNU_CONFIGURE=	yes
 CPE_VENDOR=	xmlsoft
+SHEBANG_FILES=	*.py */*.py */*/*.py
 USE_LDCONFIG=	yes
+
+INSTALL_TARGET=	install-strip
+TEST_TARGET=	check
+
 CONFIGURE_ARGS?=--with-iconv=${ICONV_PREFIX} \
 		--with-html-dir=${PREFIX}/share/doc \
 		--with-html-subdir=${PORTNAME} \
 		--without-icu \
 		--with-lzma=/usr \
 		--without-python
-INSTALL_TARGET=	install-strip
-SHEBANG_FILES=	*.py */*.py */*/*.py

-PLIST_SUB+=	LIBVERSION=${PORTVERSION}
+PLIST_SUB+=	LIBVERSION=${DISTVERSION}

 .if !defined(MASTERDIR)

-OPTIONS_DEFINE=	SCHEMA VALIDATION THREADS MEM_DEBUG XMLLINT_HIST THREAD_ALLOC
-OPTIONS_DEFAULT=SCHEMA VALIDATION THREADS
-SCHEMA_DESC=	XML schema support
-SCHEMA_CONFIGURE_WITH=	schemas
-VALIDATION_DESC=	Validation support
-VALIDATION_CONFIGURE_OFF=	--without-valid
-THREADS_DESC=	Threads support
-THREADS_CONFIGURE_WITH=	threads
-MEM_DEBUG_DESC=	Memory debugging (DEVELOPERS ONLY!)
-MEM_DEBUG_CONFIGURE_WITH=	mem-debug
-XMLLINT_HIST_DESC=	History for xmllint
-XMLLINT_HIST_CONFIGURE_WITH=	history
+OPTIONS_DEFINE=		MEM_DEBUG SCHEMA THREAD_ALLOC THREADS VALIDATION \
+			XMLLINT_HIST
+OPTIONS_DEFAULT=	SCHEMA VALIDATION THREADS
+
+MEM_DEBUG_DESC=		Memory debugging (DEVELOPERS ONLY!)
+SCHEMA_DESC=		XML schema support
 THREAD_ALLOC_DESC=	Per-thread memory (DEVELOPERS ONLY!)
+VALIDATION_DESC=	Validation support
+XMLLINT_HIST_DESC=	History for xmllint
+
+MEM_DEBUG_CONFIGURE_WITH=	mem-debug
+SCHEMA_CONFIGURE_WITH=		schemas
 THREAD_ALLOC_CONFIGURE_WITH=	thread-alloc
+THREADS_CONFIGURE_WITH=		threads
+VALIDATION_CONFIGURE_OFF=	--without-valid
+XMLLINT_HIST_CONFIGURE_WITH=	history

 .endif # !defined(MASTERDIR)

--- a/textproc/libxml2/distinfo
+++ b/textproc/libxml2/distinfo
@ -1,11 +1,3 @@
-TIMESTAMP = 1602549798
-SHA256 (gnome2/libxml2-2.9.10.tar.gz) = aafee193ffb8fe0c82d4afef6ef91972cbaf5feea100edc2f262750611b4be1f
-SIZE (gnome2/libxml2-2.9.10.tar.gz) = 5624761
-SHA256 (gnome2/7ffcd44d7e6c46704f8af0321d9314cd26e0e18a.patch) = 8bab1a7fcc22a8f9a3f89648660bbca424196d82967e213bd27c1dcc9a9544a5
-SIZE (gnome2/7ffcd44d7e6c46704f8af0321d9314cd26e0e18a.patch) = 1015
-SHA256 (gnome2/0e1a49c8907645d2e155f0d89d4d9895ac5112b5.patch) = 4a1dca36e762a0e2affb0779918fbf1665a00d984ffbd3efa45d3d202f87ea8c
-SIZE (gnome2/0e1a49c8907645d2e155f0d89d4d9895ac5112b5.patch) = 996
-SHA256 (gnome2/50f06b3efb638efb0abd95dc62dca05ae67882c2.patch) = 701048e726e2f3f7f2a71a7054030fc154b5edace72e23c5934ecd9ee09ad811
-SIZE (gnome2/50f06b3efb638efb0abd95dc62dca05ae67882c2.patch) = 1052
-SHA256 (gnome2/edc7b6abb0c125eeb888748c334897f60aab0854.patch) = eac708cc0bcb19c59c63874e5518f9084b177c8a10981539d90ba41d9e8414a1
-SIZE (gnome2/edc7b6abb0c125eeb888748c334897f60aab0854.patch) = 3019
+TIMESTAMP = 1622963062
+SHA256 (gnome2/libxml2-2.9.12.tar.gz) = c8d6681e38c56f172892c85ddc0852e1fd4b53b4209e7f4ebf17f7e2eae71d92
+SIZE (gnome2/libxml2-2.9.12.tar.gz) = 5681632
--- a/textproc/libxml2/files/patch-CVE-2019-20388
+++ b/textproc/libxml2/files/patch-CVE-2019-20388
@ -1,33 +0,0 @@
-From 7ffcd44d7e6c46704f8af0321d9314cd26e0e18a Mon Sep 17 00:00:00 2001
-From: Zhipeng Xie <xiezhipeng1@huawei.com>
-Date: Tue, 20 Aug 2019 16:33:06 +0800
-Subject: [PATCH] Fix memory leak in xmlSchemaValidateStream
-
-When ctxt->schema is NULL, xmlSchemaSAXPlug->xmlSchemaPreRun
-alloc a new schema for ctxt->schema and set vctxt->xsiAssemble
-to 1. Then xmlSchemaVStart->xmlSchemaPreRun initialize
-vctxt->xsiAssemble to 0 again which cause the alloced schema
-can not be freed anymore.
-
-Found with libFuzzer.
-
-Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
- xmlschemas.c | 1 -
- 1 file changed, 1 deletion(-)
-
-diff --git a/xmlschemas.c b/xmlschemas.c
-index 301c8449..39d92182 100644
--- xmlschemas.c
-+++ xmlschemas.c
-@@ -28090,7 +28090,6 @@ xmlSchemaPreRun(xmlSchemaValidCtxtPtr vctxt) {
-     vctxt->nberrors = 0;
-     vctxt->depth = -1;
-     vctxt->skipDepth = -1;
-    vctxt->xsiAssemble = 0;
-     vctxt->hasKeyrefs = 0;
- #ifdef ENABLE_IDC_NODE_TABLES_TEST
-     vctxt->createIDCNodeTables = 1;
-- 
-GitLab
-
--- a/textproc/libxml2/files/patch-CVE-2020-24977
+++ b/textproc/libxml2/files/patch-CVE-2020-24977
@ -1,36 +0,0 @@
-From 50f06b3efb638efb0abd95dc62dca05ae67882c2 Mon Sep 17 00:00:00 2001
-From: Nick Wellnhofer <wellnhofer@aevum.de>
-Date: Fri, 7 Aug 2020 21:54:27 +0200
-Subject: [PATCH] Fix out-of-bounds read with 'xmllint --htmlout'
-
-Make sure that truncated UTF-8 sequences don't cause an out-of-bounds
-array access.
-
-Thanks to @SuhwanSong and the Agency for Defense Development (ADD) for
-the report.
-
-Fixes #178.
---
- xmllint.c | 6 ++++++
- 1 file changed, 6 insertions(+)
-
-diff --git a/xmllint.c b/xmllint.c
-index f6a8e463..c647486f 100644
--- xmllint.c
-+++ xmllint.c
-@@ -528,6 +528,12 @@ static void
- xmlHTMLEncodeSend(void) {
-     char *result;
- 
-+    /*
-+     * xmlEncodeEntitiesReentrant assumes valid UTF-8, but the buffer might
-+     * end with a truncated UTF-8 sequence. This is a hack to at least avoid
-+     * an out-of-bounds read.
-+     */
-+    memset(&buffer[sizeof(buffer)-4], 0, 4);
-     result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
-     if (result) {
- 	xmlGenericError(xmlGenericErrorContext, "%s", result);
-- 
-GitLab
-
--- a/textproc/libxml2/files/patch-CVE-2020-7595
+++ b/textproc/libxml2/files/patch-CVE-2020-7595
@ -1,32 +0,0 @@
-From 0e1a49c8907645d2e155f0d89d4d9895ac5112b5 Mon Sep 17 00:00:00 2001
-From: Zhipeng Xie <xiezhipeng1@huawei.com>
-Date: Thu, 12 Dec 2019 17:30:55 +0800
-Subject: [PATCH] Fix infinite loop in xmlStringLenDecodeEntities
-
-When ctxt->instate == XML_PARSER_EOF,xmlParseStringEntityRef
-return NULL which cause a infinite loop in xmlStringLenDecodeEntities
-
-Found with libFuzzer.
-
-Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
---
- parser.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/parser.c b/parser.c
-index d1c31963..a34bb6cd 100644
--- parser.c
-+++ parser.c
-@@ -2646,7 +2646,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
-     else
-         c = 0;
-     while ((c != 0) && (c != end) && /* non input consuming loop */
-	   (c != end2) && (c != end3)) {
-+           (c != end2) && (c != end3) &&
-+           (ctxt->instate != XML_PARSER_EOF)) {
- 
- 	if (c == 0) break;
-         if ((c == '&') && (str[1] == '#')) {
-- 
-GitLab
-
--- a/textproc/libxml2/files/patch-CVE-2021-3541
+++ b/textproc/libxml2/files/patch-CVE-2021-3541
@ -1,67 +0,0 @@
-From 8598060bacada41a0eb09d95c97744ff4e428f8e Mon Sep 17 00:00:00 2001
-From: Daniel Veillard <veillard@redhat.com>
-Date: Thu, 13 May 2021 14:55:12 +0200
-Subject: [PATCH] Patch for security issue CVE-2021-3541
-
-This is relapted to parameter entities expansion and following
-the line of the billion laugh attack. Somehow in that path the
-counting of parameters was missed and the normal algorithm based
-on entities "density" was useless.
---
- parser.c | 26 ++++++++++++++++++++++++++
- 1 file changed, 26 insertions(+)
-
-diff --git parser.c parser.c
-index f5e5e169..c9312fa4 100644
--- parser.c
-+++ parser.c
-@@ -140,6 +140,7 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
-                      xmlEntityPtr ent, size_t replacement)
- {
-     size_t consumed = 0;
-+    int i;
- 
-     if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
-         return (0);
-@@ -177,6 +178,28 @@ xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
- 	    rep = NULL;
- 	}
-     }
-+
-+    /*
-+     * Prevent entity exponential check, not just replacement while
-+     * parsing the DTD
-+     * The check is potentially costly so do that only once in a thousand
-+     */
-+    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
-+        (ctxt->nbentities % 1024 == 0)) {
-+	for (i = 0;i < ctxt->inputNr;i++) {
-+	    consumed += ctxt->inputTab[i]->consumed +
-+	               (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
-+	}
-+	if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
-+	    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
-+	    ctxt->instate = XML_PARSER_EOF;
-+	    return (1);
-+	}
-+	consumed = 0;
-+    }
-+
-+
-+
-     if (replacement != 0) {
- 	if (replacement < XML_MAX_TEXT_LENGTH)
- 	    return(0);
-@@ -7963,6 +7986,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
-             xmlChar start[4];
-             xmlCharEncoding enc;
- 
-+	    if (xmlParserEntityCheck(ctxt, 0, entity, 0))
-+	        return;
-+
- 	    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
- 	        ((ctxt->options & XML_PARSE_NOENT) == 0) &&
- 		((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
-- 
-2.31.1
-
--- a/textproc/libxml2/files/patch-Makefile.in
+++ b/textproc/libxml2/files/patch-Makefile.in
@ -1,6 +1,24 @@
--- Makefile.in.orig	2019-11-16 14:42:34 UTC
+--- Makefile.in.orig	2021-06-09 19:53:33 UTC
 +++ Makefile.in
-@@ -1284,7 +1284,7 @@ am--depfiles: $(am__depfiles_remade)
+@@ -760,7 +760,7 @@ man_MANS = xml2-config.1 libxml.3
+ m4datadir = $(datadir)/aclocal
+ m4data_DATA = libxml.m4
+ runtest_SOURCES = runtest.c
+-runtest_LDFLAGS = 
+runtest_LDFLAGS = -pthread
+ runtest_DEPENDENCIES = $(DEPS)
+ runtest_LDADD = $(BASE_THREAD_LIBS) $(RDL_LIBS) $(LDADDS)
+ testrecurse_SOURCES = testrecurse.c
+@@ -808,7 +808,7 @@ testC14N_LDFLAGS = 
+ testC14N_DEPENDENCIES = $(DEPS)
+ testC14N_LDADD = $(LDADDS)
+ testThreads_SOURCES = testThreads.c
+-testThreads_LDFLAGS = 
+testThreads_LDFLAGS = -pthread
+ testThreads_DEPENDENCIES = $(DEPS)
+ testThreads_LDADD = $(BASE_THREAD_LIBS) $(LDADDS)
+ testURI_SOURCES = testURI.c
+@@ -1285,7 +1285,7 @@ am--depfiles: $(am__depfiles_remade)
 @am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
 
 .c.lo:
@ -9,7 +27,7 @@
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@@ -1773,7 +1773,7 @@ check-am: all-am
+@@ -1774,7 +1774,7 @@ check-am: all-am
 	$(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
 check: $(BUILT_SOURCES)
 	$(MAKE) $(AM_MAKEFLAGS) check-recursive
@ -18,7 +36,7 @@
 		config.h
 install-binPROGRAMS: install-libLTLIBRARIES
 
-@@ -1910,7 +1910,7 @@ info: info-recursive
+@@ -1911,7 +1911,7 @@ info: info-recursive
 
 info-am:
 
--- a/textproc/libxml2/files/patch-Python-39-support
+++ b/textproc/libxml2/files/patch-Python-39-support
@ -1,92 +0,0 @@
-From edc7b6abb0c125eeb888748c334897f60aab0854 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
-Date: Fri, 28 Feb 2020 12:48:14 +0100
-Subject: [PATCH] Parenthesize Py<type>_Check() in ifs
-
-In C, if expressions should be parenthesized.
-PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized
-expression before, but that's not API to rely on.
-
-Since Python 3.9.0a4 it needs to be parenthesized explicitly.
-
-Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149
---
- python/libxml.c |  4 ++--
- python/types.c  | 12 ++++++------
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/python/libxml.c b/python/libxml.c
-index bc676c4e..81e709f3 100644
--- python/libxml.c
-+++ python/libxml.c
-@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
- 	lenread = PyBytes_Size(ret);
- 	data = PyBytes_AsString(ret);
- #ifdef PyUnicode_Check
-    } else if PyUnicode_Check (ret) {
-+    } else if (PyUnicode_Check (ret)) {
- #if PY_VERSION_HEX >= 0x03030000
-         Py_ssize_t size;
- 	const char *tmp;
-@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
- 	lenread = PyBytes_Size(ret);
- 	data = PyBytes_AsString(ret);
- #ifdef PyUnicode_Check
-    } else if PyUnicode_Check (ret) {
-+    } else if (PyUnicode_Check (ret)) {
- #if PY_VERSION_HEX >= 0x03030000
-         Py_ssize_t size;
- 	const char *tmp;
-diff --git a/python/types.c b/python/types.c
-index c2bafeb1..ed284ec7 100644
--- python/types.c
-+++ python/types.c
-@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
-     if (obj == NULL) {
-         return (NULL);
-     }
-    if PyFloat_Check (obj) {
-+    if (PyFloat_Check (obj)) {
-         ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj));
-    } else if PyLong_Check(obj) {
-+    } else if (PyLong_Check(obj)) {
- #ifdef PyLong_AS_LONG
-         ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj));
- #else
-         ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj));
- #endif
- #ifdef PyBool_Check
-    } else if PyBool_Check (obj) {
-+    } else if (PyBool_Check (obj)) {
- 
-         if (obj == Py_True) {
-           ret = xmlXPathNewBoolean(1);
-@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
-           ret = xmlXPathNewBoolean(0);
-         }
- #endif
-    } else if PyBytes_Check (obj) {
-+    } else if (PyBytes_Check (obj)) {
-         xmlChar *str;
- 
-         str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj),
-                          PyBytes_GET_SIZE(obj));
-         ret = xmlXPathWrapString(str);
- #ifdef PyUnicode_Check
-    } else if PyUnicode_Check (obj) {
-+    } else if (PyUnicode_Check (obj)) {
- #if PY_VERSION_HEX >= 0x03030000
-         xmlChar *str;
- 	const char *tmp;
-@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
- 	ret = xmlXPathWrapString(str);
- #endif
- #endif
-    } else if PyList_Check (obj) {
-+    } else if (PyList_Check (obj)) {
-         int i;
-         PyObject *node;
-         xmlNodePtr cur;
-- 
-GitLab
-
--- a/textproc/libxml2/files/patch-git-01-85b1792e37b131e7a51af98a37f92472e8de5f3f
+++ b/textproc/libxml2/files/patch-git-01-85b1792e37b131e7a51af98a37f92472e8de5f3f
@ -0,0 +1,211 @@
+From 85b1792e37b131e7a51af98a37f92472e8de5f3f Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Tue, 18 May 2021 20:08:28 +0200
+Subject: [PATCH] Work around lxml API abuse
+
+Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted
+parent pointers. This used to work with the old recursive code but the
+non-recursive rewrite required parent pointers to be set correctly.
+
+Unfortunately, lxml relies on the old behavior and passes subtrees with
+a corrupted structure. Fall back to a recursive function call if an
+invalid parent pointer is detected.
+
+Fixes #255.
+---
+ HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------
+ xmlsave.c  | 31 +++++++++++++++++++++----------
+ 2 files changed, 49 insertions(+), 28 deletions(-)
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+index 24434d45..bdd639c7 100644
+--- HTMLtree.c
+++ HTMLtree.c
+@@ -744,7 +744,7 @@ void
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ 	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
+                          int format) {
+-    xmlNodePtr root;
+    xmlNodePtr root, parent;
+     xmlAttrPtr attr;
+     const htmlElemDesc * info;
+ 
+@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+     }
+ 
+     root = cur;
+    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_HTML_DOCUMENT_NODE:
+@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+             if (((xmlDocPtr) cur)->intSubset != NULL) {
+                 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+             }
+-            if (cur->children != NULL) {
+            /* Always validate cur->parent when descending. */
+            if ((cur->parent == parent) && (cur->children != NULL)) {
+                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+             break;
+ 
+         case XML_ELEMENT_NODE:
+            /*
+             * Some users like lxml are known to pass nodes with a corrupted
+             * tree structure. Fall back to a recursive call to handle this
+             * case.
+             */
+            if ((cur->parent != parent) && (cur->children != NULL)) {
+                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
+                break;
+            }
+
+             /*
+              * Get specific HTML info for that node.
+              */
+@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->name != NULL) &&
+                     (cur->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
+                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 (info != NULL) && (!info->isinline)) {
+                 if ((cur->next->type != HTML_TEXT_NODE) &&
+                     (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                    (cur->parent != NULL) &&
+-                    (cur->parent->name != NULL) &&
+-                    (cur->parent->name[0] != 'p')) /* p, pre, param */
+                    (parent != NULL) &&
+                    (parent->name != NULL) &&
+                    (parent->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
+             }
+ 
+@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             if (((cur->name == (const xmlChar *)xmlStringText) ||
+                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
+-                ((cur->parent == NULL) ||
+-                 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
+-                  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
+                ((parent == NULL) ||
+                 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
+                  (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
+                 xmlChar *buffer;
+ 
+                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
+            cur = parent;
+            /* cur->parent was validated when descending. */
+            parent = cur->parent;
+ 
+             if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
+                 (cur->type == XML_DOCUMENT_NODE)) {
+@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->next != NULL)) {
+                     if ((cur->next->type != HTML_TEXT_NODE) &&
+                         (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                        (cur->parent != NULL) &&
+-                        (cur->parent->name != NULL) &&
+-                        (cur->parent->name[0] != 'p')) /* p, pre, param */
+                        (parent != NULL) &&
+                        (parent->name != NULL) &&
+                        (parent->name[0] != 'p')) /* p, pre, param */
+                         xmlOutputBufferWriteString(buf, "\n");
+                 }
+             }
+diff --git a/xmlsave.c b/xmlsave.c
+index 61a40459..aedbd5e7 100644
+--- xmlsave.c
+++ xmlsave.c
+@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ static void
+ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     int format = ctxt->format;
+-    xmlNodePtr tmp, root, unformattedNode = NULL;
+    xmlNodePtr tmp, root, unformattedNode = NULL, parent;
+     xmlAttrPtr attr;
+     xmlChar *start, *end;
+     xmlOutputBufferPtr buf;
+@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     buf = ctxt->buf;
+ 
+     root = cur;
+    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_DOCUMENT_NODE:
+@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_DOCUMENT_FRAG_NODE:
+-            if (cur->children != NULL) {
+            /* Always validate cur->parent when descending. */
+            if ((cur->parent == parent) && (cur->children != NULL)) {
+                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_ELEMENT_NODE:
+-	    if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
+            /*
+             * Some users like lxml are known to pass nodes with a corrupted
+             * tree structure. Fall back to a recursive call to handle this
+             * case.
+             */
+            if ((cur->parent != parent) && (cur->children != NULL)) {
+                xmlNodeDumpOutputInternal(ctxt, cur);
+                break;
+            }
+
+	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
+                (xmlIndentTreeOutput))
+ 		xmlOutputBufferWrite(buf, ctxt->indent_size *
+ 				     (ctxt->level > ctxt->indent_nr ?
+ 				      ctxt->indent_nr : ctxt->level),
+@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 xmlOutputBufferWrite(buf, 1, ">");
+                 if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
+                 if (ctxt->level >= 0) ctxt->level++;
+                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
+            cur = parent;
+            /* cur->parent was validated when descending. */
+            parent = cur->parent;
+ 
+             if (cur->type == XML_ELEMENT_NODE) {
+                 if (ctxt->level > 0) ctxt->level--;
+-- 
+GitLab
+
--- a/textproc/libxml2/files/patch-git-02-13ad8736d294536da4cbcd70a96b0a2fbf47070c
+++ b/textproc/libxml2/files/patch-git-02-13ad8736d294536da4cbcd70a96b0a2fbf47070c
@ -0,0 +1,46 @@
+From 13ad8736d294536da4cbcd70a96b0a2fbf47070c Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Tue, 25 May 2021 10:55:25 +0200
+Subject: [PATCH] Fix regression in xmlNodeDumpOutputInternal
+
+Commit 85b1792e could cause additional whitespace if xmlNodeDump was
+called with a non-zero starting level.
+---
+ xmlsave.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/xmlsave.c b/xmlsave.c
+index aedbd5e7..489505f4 100644
+--- xmlsave.c
+++ xmlsave.c
+@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_ELEMENT_NODE:
+	    if ((cur != root) && (ctxt->format == 1) &&
+                (xmlIndentTreeOutput))
+		xmlOutputBufferWrite(buf, ctxt->indent_size *
+				     (ctxt->level > ctxt->indent_nr ?
+				      ctxt->indent_nr : ctxt->level),
+				     ctxt->indent);
+
+             /*
+              * Some users like lxml are known to pass nodes with a corrupted
+              * tree structure. Fall back to a recursive call to handle this
+@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 break;
+             }
+ 
+-	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
+-                (xmlIndentTreeOutput))
+-		xmlOutputBufferWrite(buf, ctxt->indent_size *
+-				     (ctxt->level > ctxt->indent_nr ?
+-				      ctxt->indent_nr : ctxt->level),
+-				     ctxt->indent);
+-
+             xmlOutputBufferWrite(buf, 1, "<");
+             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
+-- 
+GitLab
+
--- a/textproc/libxml2/files/patch-git-03-3e1aad4fe584747fd7d17cc7b2863a78e2d21a77
+++ b/textproc/libxml2/files/patch-git-03-3e1aad4fe584747fd7d17cc7b2863a78e2d21a77
@ -0,0 +1,31 @@
+From 3e1aad4fe584747fd7d17cc7b2863a78e2d21a77 Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Wed, 2 Jun 2021 17:31:49 +0200
+Subject: [PATCH] Fix XPath recursion limit
+
+Fix accounting of recursion depth when parsing XPath expressions.
+
+This silly bug introduced in commit 804c5297 could lead to spurious
+errors when parsing larger expressions or XSLT documents.
+
+Should fix #264.
+---
+ xpath.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xpath.c b/xpath.c
+index 7497ba07..1aa2f1ab 100644
+--- xpath.c
+++ xpath.c
+@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
+     }
+ 
+     if (xpctxt != NULL)
+-        xpctxt->depth -= 1;
+        xpctxt->depth -= 10;
+ }
+ 
+ /**
+-- 
+GitLab
+
--- a/textproc/libxml2/files/patch-git-04-92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f
+++ b/textproc/libxml2/files/patch-git-04-92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f
@ -0,0 +1,43 @@
+From 92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Mon, 7 Jun 2021 15:09:53 +0200
+Subject: [PATCH] Fix whitespace when serializing empty HTML documents
+
+The old, non-recursive HTML serialization code would always terminate
+the output with a newline. The new implementation omitted the newline
+if the document node had no children. Readd the newline when
+serializing empty documents.
+
+Fixes #266.
+---
+ HTMLtree.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+index bdd639c7..7a2b8558 100644
+--- HTMLtree.c
+++ HTMLtree.c
+@@ -763,11 +763,15 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+             if (((xmlDocPtr) cur)->intSubset != NULL) {
+                 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+             }
+-            /* Always validate cur->parent when descending. */
+-            if ((cur->parent == parent) && (cur->children != NULL)) {
+-                parent = cur;
+-                cur = cur->children;
+-                continue;
+            if (cur->children != NULL) {
+                /* Always validate cur->parent when descending. */
+                if (cur->parent == parent) {
+                    parent = cur;
+                    cur = cur->children;
+                    continue;
+                }
+            } else {
+                xmlOutputBufferWriteString(buf, "\n");
+             }
+             break;
+ 
+-- 
+GitLab
+
--- a/textproc/libxml2/files/patch-git-106757e8c1e26ad9b8c924c7f304074b79e082c5
+++ b/textproc/libxml2/files/patch-git-106757e8c1e26ad9b8c924c7f304074b79e082c5
@ -1,39 +0,0 @@
-commit 106757e8c1e26ad9b8c924c7f304074b79e082c5
-Author: Daniel Cheng <dcheng@google.com>
-Date:   Fri Apr 10 14:52:03 2020 -0700
-
-    Guard new calls to xmlValidatePopElement in xml_reader.c
-    
-    Closes #154.
-
-commit 386fb27654b93d9fb2880e03fb508d618a2e66f1
-Author: Łukasz Wojniłowicz <lukasz.wojnilowicz@gmail.com>
-Date:   Tue Apr 28 17:00:37 2020 +0200
-
-    Add LIBXML_VALID_ENABLED to xmlreader
-    
-    There are already LIBXML_VALID_ENABLED in this file to guard against
-    "--without-valid" at "./configure" step, but here they were missing.
-diff --git xmlreader.c xmlreader.c
-index 687c8b3c..3fd9aa4c 100644
--- xmlreader.c
-+++ xmlreader.c
-@@ -2260,14 +2260,18 @@ xmlFreeTextReader(xmlTextReaderPtr reader) {
-     if (reader->ctxt != NULL) {
-         if (reader->dict == reader->ctxt->dict)
- 	    reader->dict = NULL;
-+#ifdef LIBXML_VALID_ENABLED
- 	if ((reader->ctxt->vctxt.vstateTab != NULL) &&
- 	    (reader->ctxt->vctxt.vstateMax > 0)){
-+#ifdef LIBXML_REGEXP_ENABLED
-             while (reader->ctxt->vctxt.vstateNr > 0)
-                 xmlValidatePopElement(&reader->ctxt->vctxt, NULL, NULL, NULL);
-+#endif /* LIBXML_REGEXP_ENABLED */
- 	    xmlFree(reader->ctxt->vctxt.vstateTab);
- 	    reader->ctxt->vctxt.vstateTab = NULL;
- 	    reader->ctxt->vctxt.vstateMax = 0;
- 	}
-+#endif /* LIBXML_VALID_ENABLED */
- 	if (reader->ctxt->myDoc != NULL) {
- 	    if (reader->preserve == 0)
- 		xmlTextReaderFreeDoc(reader, reader->ctxt->myDoc);