Add xmlCatalogResolveCache This will improve performance in entity resolution by avoiding checking the same catalog several times. Fix https://gitlab.gnome.org/GNOME/libxml2/-/issues/1051
diff --git a/catalog.c b/catalog.c index ba9ee7a..21c91af 100644 --- a/catalog.c +++ b/catalog.c
@@ -191,6 +191,62 @@ */ static int xmlCatalogInitialized = 0; +/* + * HashTable to store any resolution query done to a XML_CATA_CATALOG + * entry to avoid cycles in resolution. + */ +static xmlHashTablePtr xmlCatalogResolveCache = NULL; + +/* + * xmlResetCatalogResolveCache + * Free the xmlCatalogResolveCache and sets to NULL + */ +static void +xmlResetCatalogResolveCache(void) +{ + xmlRMutexLock(&xmlCatalogMutex); + if (xmlCatalogResolveCache != NULL) { + xmlHashFree(xmlCatalogResolveCache, NULL); + xmlCatalogResolveCache = NULL; + } + xmlRMutexUnlock(&xmlCatalogMutex); +} + +/* + * xmlCatalogResolveCacheVisited + * Check if the url/pubID/sysID combination is present in the + * xmlCatalogResolveCache, and if not insert it. + * + * @param url The catalog url + * @param pubID The pubID resolve filed + * @param sysID The sysID resolve filed + * @returns 1 if already present in the cache, 0 if not + */ +static int +xmlCatalogResolveCacheVisited(const xmlChar *url, + const xmlChar *pubID, + const xmlChar *sysID) +{ + int ret = 0; + xmlRMutexLock(&xmlCatalogMutex); + if (xmlCatalogResolveCache == NULL) { + xmlCatalogResolveCache = xmlHashCreate(10); + xmlHashAddEntry3(xmlCatalogResolveCache, url, pubID, sysID, BAD_CAST url); + xmlRMutexUnlock(&xmlCatalogMutex); + return 0; + } + + if (xmlHashLookup3(xmlCatalogResolveCache, url, pubID, sysID)) { + ret = 1; + } else { + xmlHashAddEntry3(xmlCatalogResolveCache, url, pubID, sysID, BAD_CAST url); + ret = 0; + } + + xmlRMutexUnlock(&xmlCatalogMutex); + return ret; +} + /************************************************************************ * * * Catalog error handlers * @@ -1162,6 +1218,7 @@ return(ret); } + /** * Examines an XML tree node of a catalog and build * a Catalog entry from it adding it to its parent. The examination can @@ -2019,6 +2076,11 @@ } while (catal != NULL) { if (catal->type == XML_CATA_CATALOG) { + if (xmlCatalogResolveCacheVisited(catal->URL, pubID, sysID)) { + if (xmlDebugCatalogs) xmlCatalogPrintDebug("Ignoring %s, already visited for %s, %s\n", catal->URL, pubID, sysID); + catal = catal->next; + continue; + } if (catal->children == NULL) { xmlFetchXMLCatalogFile(catal); } @@ -2087,6 +2149,12 @@ cur = catal; while (cur != NULL) { if (cur->type == XML_CATA_CATALOG) { + if (xmlCatalogResolveCacheVisited(cur->URL, URI, NULL)) { + if (xmlDebugCatalogs) xmlCatalogPrintDebug("Ignoring %s, already visited for %s\n", cur->URL, URI); + cur = cur->next; + continue; + } + if (cur->children == NULL) { xmlFetchXMLCatalogFile(cur); } @@ -2788,7 +2856,9 @@ } else #endif /* LIBXML_SGML_CATALOG_ENABLED */ { - ret = xmlCatalogListXMLResolve(catal->xml, NULL, sysID); + xmlResetCatalogResolveCache(); + ret = xmlCatalogListXMLResolve(catal->xml, NULL, sysID); + xmlResetCatalogResolveCache(); if (ret == XML_CATAL_BREAK) ret = NULL; } @@ -2826,7 +2896,9 @@ } else #endif /* LIBXML_SGML_CATALOG_ENABLED */ { - ret = xmlCatalogListXMLResolve(catal->xml, pubID, NULL); + xmlResetCatalogResolveCache(); + ret = xmlCatalogListXMLResolve(catal->xml, pubID, NULL); + xmlResetCatalogResolveCache(); if (ret == XML_CATAL_BREAK) ret = NULL; } @@ -2876,7 +2948,10 @@ } else #endif /* LIBXML_SGML_CATALOG_ENABLED */ { + xmlResetCatalogResolveCache(); ret = xmlCatalogListXMLResolve(catal->xml, pubID, sysID); + xmlResetCatalogResolveCache(); + if (ret == XML_CATAL_BREAK) ret = NULL; } @@ -3655,7 +3730,9 @@ catal = (xmlCatalogEntryPtr) catalogs; if (catal == NULL) return(NULL); + xmlResetCatalogResolveCache(); ret = xmlCatalogListXMLResolve(catal, pubID, sysID); + xmlResetCatalogResolveCache(); if ((ret != NULL) && (ret != XML_CATAL_BREAK)) return(ret); return(NULL); @@ -3726,7 +3803,9 @@ * Check first the XML catalogs */ if (xmlDefaultCatalog != NULL) { - ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, NULL, sysID); + xmlResetCatalogResolveCache(); + ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, NULL, sysID); + xmlResetCatalogResolveCache(); if ((ret != NULL) && (ret != XML_CATAL_BREAK)) { snprintf((char *) result, sizeof(result) - 1, "%s", (char *) ret); result[sizeof(result) - 1] = 0; @@ -3771,7 +3850,9 @@ * Check first the XML catalogs */ if (xmlDefaultCatalog != NULL) { - ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, pubID, NULL); + xmlResetCatalogResolveCache(); + ret = xmlCatalogListXMLResolve(xmlDefaultCatalog->xml, pubID, NULL); + xmlResetCatalogResolveCache(); if ((ret != NULL) && (ret != XML_CATAL_BREAK)) { snprintf((char *) result, sizeof(result) - 1, "%s", (char *) ret); result[sizeof(result) - 1] = 0;
diff --git a/test/catalogs/test.sh b/test/catalogs/test.sh index 9c5f5a8..7e5eaa7 100755 --- a/test/catalogs/test.sh +++ b/test/catalogs/test.sh
@@ -1,7 +1,5 @@ #!/bin/sh -set -e - echo "## Catalog regression tests" if [ -n "$1" ]; then