From bc58e61b03d5799df977b3d6a4865e3b6f380cf9 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 27 May 2026 15:43:03 +0200 Subject: [PATCH 01/11] feat: Add Hadoop 3.5.0 --- .scripts/upload_new_boost_version.sh | 59 ++++ hadoop/boil-config.toml | 22 +- hadoop/hadoop/Dockerfile | 30 +- hadoop/hadoop/boil-config.toml | 25 +- .../0001-YARN-11527-Update-node.js.patch | 22 ++ ...ding-datanode-registration-addresses.patch | 259 ++++++++++++++++++ ...ync-profiler-also-grab-itimer-events.patch | 29 ++ ...issing-operationType-for-some-operat.patch | 199 ++++++++++++++ .../3.4.3/0005-Update-CycloneDX-plugin.patch | 44 +++ ...adoop-OSS-Connector-adds-support-for.patch | 245 +++++++++++++++++ ...N-11873-Update-nodejs-to-LTS-version.patch | 22 ++ ...or-hostname-resolution-bug-with-data.patch | 48 ++++ .../stackable/patches/3.4.3/patchable.toml | 2 + .../0001-YARN-11527-Update-node.js.patch | 22 ++ ...ding-datanode-registration-addresses.patch | 259 ++++++++++++++++++ ...ync-profiler-also-grab-itimer-events.patch | 29 ++ .../3.5.0/0004-Update-CycloneDX-plugin.patch | 35 +++ ...N-11873-Update-nodejs-to-LTS-version.patch | 22 ++ ...or-hostname-resolution-bug-with-data.patch | 48 ++++ .../stackable/patches/3.5.0/patchable.toml | 2 + .../stackable/patches/0.6.0/patchable.toml | 2 + rust/patchable/README.md | 2 +- shared/boost/Dockerfile | 57 ++++ shared/boost/boil-config.toml | 17 ++ .../stackable/patches/3.21.12/patchable.toml | 2 + .../stackable/patches/3.25.5/patchable.toml | 2 + 26 files changed, 1492 insertions(+), 13 deletions(-) create mode 100755 .scripts/upload_new_boost_version.sh create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0001-YARN-11527-Update-node.js.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch create mode 100644 hadoop/hadoop/stackable/patches/3.4.3/patchable.toml create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch create mode 100644 hadoop/hadoop/stackable/patches/3.5.0/patchable.toml create mode 100644 hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml create mode 100644 shared/boost/Dockerfile create mode 100644 shared/boost/boil-config.toml create mode 100644 shared/protobuf/stackable/patches/3.21.12/patchable.toml create mode 100644 shared/protobuf/stackable/patches/3.25.5/patchable.toml diff --git a/.scripts/upload_new_boost_version.sh b/.scripts/upload_new_boost_version.sh new file mode 100755 index 000000000..0b333be7a --- /dev/null +++ b/.scripts/upload_new_boost_version.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# This script mirrors the boostorg/boost source bundle for the given version to Nexus. +# The boost source bundle is architecture independent. +# It contains its own build system (b2) which is also built from source before building boost itself, so we don't need to worry about architecture specific builds. +# This artifact is used by the hadoop/boost local image. + + +set -euo pipefail + +VERSION=${1:?"Missing version number argument (arg 1)"} +NEXUS_USER=${2:?"Missing Nexus username argument (arg 2)"} + +read -r -s -p "Nexus Password: " NEXUS_PASSWORD +echo "" + +# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory +# Find the directory name of the script +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# the temp directory used, within $DIR +WORK_DIR=$(mktemp -d -p "$DIR") + +# check if tmp dir was created +if [[ ! "$WORK_DIR" || ! -d "$WORK_DIR" ]]; then + echo "Could not create temp dir" + exit 1 +fi + +# deletes the temp directory +function cleanup { + rm -rf "$WORK_DIR" +} + +# register the cleanup function to be called on the EXIT signal +trap cleanup EXIT + +cd "$WORK_DIR" || exit + +# boost does not currently publish signatures or SBOMs +BOOST_UNDERSCORE="$(echo "${VERSION}" | tr '.' '_')" +BOOST_TARBALL="boost_${BOOST_UNDERSCORE}.tar.bz2" +DOWNLOAD_URL="https://archives.boost.io/release/$VERSION/source/$BOOST_TARBALL" + +echo "Downloading boost" +if ! curl --fail -Ls -O "$DOWNLOAD_URL"; then + echo "Failed to download from $DOWNLOAD_URL" + exit 1 +fi + +FILE_NAME=$(basename "$DOWNLOAD_URL") + +echo "Uploading boost to Nexus" +if ! curl --fail -o /dev/null --progress-bar -u "$NEXUS_USER:$NEXUS_PASSWORD" --upload-file "$FILE_NAME" 'https://repo.stackable.tech/repository/packages/boost/'; then + echo "Failed to upload boost to Nexus" + exit 1 +fi + +echo "Successfully uploaded new version of boost ($VERSION) to Nexus" +echo "https://repo.stackable.tech/service/rest/repository/browse/packages/boost/" diff --git a/hadoop/boil-config.toml b/hadoop/boil-config.toml index 4c6ef2a64..e082c3fe2 100644 --- a/hadoop/boil-config.toml +++ b/hadoop/boil-config.toml @@ -20,4 +20,24 @@ java-devel = "11" [versions."3.4.2".build-arguments] async-profiler-version = "2.9" jmx-exporter-version = "1.3.0" -hdfs-utils-version = "0.5.0" +hdfs-utils-version = "0.6.0" + +[versions."3.4.3".local-images] +"hadoop/hadoop" = "3.4.3" +java-base = "11" +java-devel = "11" + +[versions."3.4.3".build-arguments] +async-profiler-version = "2.9" +jmx-exporter-version = "1.3.0" +hdfs-utils-version = "0.6.0" + +[versions."3.5.0".local-images] +"hadoop/hadoop" = "3.5.0" +java-base = "17" +java-devel = "17" + +[versions."3.5.0".build-arguments] +async-profiler-version = "2.9" +jmx-exporter-version = "1.3.0" +hdfs-utils-version = "0.6.0" diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index a17feec73..f5b8877bc 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -1,6 +1,7 @@ # syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 # check=error=true +FROM local-image/shared/boost AS boost-builder FROM local-image/java-devel AS hadoop-builder ARG PRODUCT_VERSION @@ -14,17 +15,22 @@ COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/patchable COPY --chown=${STACKABLE_USER_UID}:0 shared/protobuf/stackable/patches/${PROTOBUF_VERSION} /stackable/src/shared/protobuf/stackable/patches/${PROTOBUF_VERSION} RUN < 3.21 bundles abseil-cpp and utf8_range as git submodules + git submodule update --init --recursive + cmake -S . -B build -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_BUILD_TESTS=OFF + cmake --build build --parallel $(nproc) + cmake --install build --prefix /opt/protobuf + fi (cd .. && rm -r ${PROTOBUF_VERSION}) EOF diff --git a/hadoop/hadoop/boil-config.toml b/hadoop/hadoop/boil-config.toml index 7aa9fbeb6..7852ff62c 100644 --- a/hadoop/hadoop/boil-config.toml +++ b/hadoop/hadoop/boil-config.toml @@ -1,11 +1,30 @@ [versions."3.3.6".local-images] java-devel = "11" +"shared/boost" = "1.72.0" # I could not find a documented recommended version [versions."3.3.6".build-arguments] -protobuf-version = "3.7.1" +protobuf-version = "3.7.1" # https://github.com/apache/hadoop/blob/rel/release-3.3.6/BUILDING.txt + [versions."3.4.2".local-images] -java-devel = "11" +java-devel = "11" # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions +"shared/boost" = "1.72.0" # https://github.com/apache/hadoop/blob/rel/release-3.4.2/BUILDING.txt [versions."3.4.2".build-arguments] -protobuf-version = "3.7.1" +protobuf-version = "3.21.12" # https://github.com/apache/hadoop/blob/rel/release-3.4.2/BUILDING.txt + + +[versions."3.4.3".local-images] +java-devel = "11" # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions +"shared/boost" = "1.72.0" # https://github.com/apache/hadoop/blob/rel/release-3.4.3/BUILDING.txt + +[versions."3.4.3".build-arguments] +protobuf-version = "3.21.12" # https://github.com/apache/hadoop/blob/rel/release-3.4.3/BUILDING.txt + + +[versions."3.5.0".local-images] +java-devel = "17" # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions +"shared/boost" = "1.86.0" # https://github.com/apache/hadoop/blob/rel/release-3.5.0/BUILDING.txt + +[versions."3.5.0".build-arguments] +protobuf-version = "3.25.5" # https://github.com/apache/hadoop/blob/rel/release-3.5.0/BUILDING.txt diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0001-YARN-11527-Update-node.js.patch b/hadoop/hadoop/stackable/patches/3.4.3/0001-YARN-11527-Update-node.js.patch new file mode 100644 index 000000000..9d08b7c2c --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0001-YARN-11527-Update-node.js.patch @@ -0,0 +1,22 @@ +From 7ad31a922a9fbcecd884b4bdf5c416f6b0ea539e Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:40:40 +0200 +Subject: YARN-11527-Update-node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 0813904f98a..f837b1f5201 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..508dbe02a --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From d1266f7390327e1882f2646ffe595a961b56b592 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:36:20 +0200 +Subject: Allow-overriding-datanode-registration-addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index 6375778e4ea..7d49fbcfaa3 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -152,6 +152,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -491,6 +498,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index ebd2fa992e9..c56f254478b 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -181,6 +181,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -314,6 +316,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1158,27 +1165,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 21b92db3073..5d3437239ce 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -101,6 +101,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -189,6 +194,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -363,6 +373,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 956f5bbe519..22ae127d98d 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -135,6 +135,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2076,11 +2077,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch b/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..3cbccd3cd --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0003-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From 8f29f6a32a0685577023398af3e7b2854ad47e96 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:39:20 +0200 +Subject: Async-profiler-also-grab-itimer-events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index ce532741512..909892ff903 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch b/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch new file mode 100644 index 000000000..1e5dd00b1 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0004-HDFS-17378-Fix-missing-operationType-for-some-operat.patch @@ -0,0 +1,199 @@ +From a79df04a30b4477372e850a7628bd47dcb665955 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:40:41 +0200 +Subject: HDFS-17378-Fix-missing-operationType-for-some-operat + +--- + .../hdfs/server/namenode/FSNamesystem.java | 41 +++++++++++-------- + 1 file changed, 24 insertions(+), 17 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +index cfc7f24381f..e9c5fc0da25 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +@@ -2618,15 +2618,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + * @throws IOException + */ + BlockStoragePolicy getStoragePolicy(String src) throws IOException { ++ final String operationName = "getStoragePolicy"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getStoragePolicy(dir, pc, blockManager, src); + } finally { +- readUnlock("getStoragePolicy"); ++ readUnlock(operationName); + } + } + +@@ -2646,15 +2647,16 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + + long getPreferredBlockSize(String src) throws IOException { ++ final String operationName = "getPreferredBlockSize"; + checkOperation(OperationCategory.READ); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + checkOperation(OperationCategory.READ); + return FSDirAttrOp.getPreferredBlockSize(dir, pc, src); + } finally { +- readUnlock("getPreferredBlockSize"); ++ readUnlock(operationName); + } + } + +@@ -2707,7 +2709,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean createParent, short replication, long blockSize, + CryptoProtocolVersion[] supportedVersions, String ecPolicyName, + String storagePolicy, boolean logRetryCache) throws IOException { +- + HdfsFileStatus status; + try { + status = startFileInt(src, permissions, holder, clientMachine, flag, +@@ -2727,6 +2728,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + long blockSize, CryptoProtocolVersion[] supportedVersions, + String ecPolicyName, String storagePolicy, boolean logRetryCache) + throws IOException { ++ final String operationName = "create"; + if (NameNode.stateChangeLog.isDebugEnabled()) { + StringBuilder builder = new StringBuilder(); + builder.append("DIR* NameSystem.startFile: src=").append(src) +@@ -2764,7 +2766,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2827,7 +2829,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + dir.writeUnlock(); + } + } finally { +- writeUnlock("create", getLockReportInfoSupplier(src, null, stat)); ++ writeUnlock(operationName, getLockReportInfoSupplier(src, null, stat)); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -2856,10 +2858,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + boolean recoverLease(String src, String holder, String clientMachine) + throws IOException { ++ final String operationName = "recoverLease"; + boolean skipSync = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -2880,7 +2883,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + skipSync = true; + throw se; + } finally { +- writeUnlock("recoverLease"); ++ writeUnlock(operationName); + // There might be transactions logged while trying to recover the lease. + // They need to be sync'ed even when an exception was thrown. + if (!skipSync) { +@@ -3096,6 +3099,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final Set excludes, + final int numAdditionalNodes, final String clientName + ) throws IOException { ++ final String operationName = "getAdditionalDatanode"; + //check if the feature is enabled + dtpReplaceDatanodeOnFailure.checkEnabled(); + +@@ -3107,7 +3111,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + final BlockType blockType; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + readLock(); + try { + // Changing this operation category to WRITE instead of making getAdditionalDatanode as a +@@ -3133,7 +3137,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s", + src, fileId, blk, clientName, clientMachine)); + } finally { +- readUnlock("getAdditionalDatanode"); ++ readUnlock(operationName); + } + + if (clientnode == null) { +@@ -3155,10 +3159,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void abandonBlock(ExtendedBlock b, long fileId, String src, String holder) + throws IOException { ++ final String operationName = "abandonBlock"; + NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: {} of file {}", b, src); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3167,7 +3172,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + NameNode.stateChangeLog.debug( + "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b); + } finally { +- writeUnlock("abandonBlock"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } +@@ -3221,10 +3226,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + boolean completeFile(final String src, String holder, + ExtendedBlock last, long fileId) + throws IOException { ++ final String operationName = "completeFile"; + boolean success = false; + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3232,7 +3238,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last, + fileId); + } finally { +- writeUnlock("completeFile"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + if (success) { +@@ -3666,10 +3672,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + */ + void fsync(String src, long fileId, String clientName, long lastBlockLength) + throws IOException { ++ final String operationName = "fsync"; + NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName); + checkOperation(OperationCategory.WRITE); + final FSPermissionChecker pc = getPermissionChecker(); +- FSPermissionChecker.setOperationType(null); ++ FSPermissionChecker.setOperationType(operationName); + writeLock(); + try { + checkOperation(OperationCategory.WRITE); +@@ -3683,7 +3690,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, + } + FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false); + } finally { +- writeUnlock("fsync"); ++ writeUnlock(operationName); + } + getEditLog().logSync(); + } diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch b/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..b479c49c2 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0005-Update-CycloneDX-plugin.patch @@ -0,0 +1,44 @@ +From a1b7a73b7ebe8ad84ab3f417d2b475dd2b51c192 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:18 +0200 +Subject: Update-CycloneDX-plugin + +--- + pom.xml | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/pom.xml b/pom.xml +index d34fb58e22c..d30fd28be1f 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -121,7 +121,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + 4.2.0 + 1.1.1 + 3.10.1 +- 2.7.10 ++ 2.8.0 + + bash + +@@ -773,6 +773,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -781,9 +786,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch b/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch new file mode 100644 index 000000000..1cd05abc0 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0006-HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for.patch @@ -0,0 +1,245 @@ +From 701aa813622cc8f78ac5c49ba9c4a9d638596657 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:46 +0200 +Subject: HADOOP-19352.-Hadoop-OSS-Connector-adds-support-for- + +--- + hadoop-project/pom.xml | 2 +- + hadoop-tools/hadoop-aliyun/pom.xml | 26 +++++ + .../aliyun/oss/AliyunOSSFileSystemStore.java | 16 +++ + .../hadoop/fs/aliyun/oss/Constants.java | 15 +++ + .../fs/aliyun/oss/ITAliyunOSSSignatureV4.java | 98 +++++++++++++++++++ + .../src/test/resources/log4j.properties | 3 + + 6 files changed, 159 insertions(+), 1 deletion(-) + create mode 100644 hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f837b1f5201..f8c7f2388f3 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -1692,7 +1692,7 @@ + + com.aliyun.oss + aliyun-sdk-oss +- 3.13.2 ++ 3.18.1 + + + org.apache.httpcomponents +diff --git a/hadoop-tools/hadoop-aliyun/pom.xml b/hadoop-tools/hadoop-aliyun/pom.xml +index 50d6d4576c5..42fa66976af 100644 +--- a/hadoop-tools/hadoop-aliyun/pom.xml ++++ b/hadoop-tools/hadoop-aliyun/pom.xml +@@ -165,5 +165,31 @@ + test + jar + ++ ++ ++ org.junit.jupiter ++ junit-jupiter-api ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-engine ++ test ++ ++ ++ org.junit.jupiter ++ junit-jupiter-params ++ test ++ ++ ++ org.junit.platform ++ junit-platform-launcher ++ test ++ ++ ++ org.junit.vintage ++ junit-vintage-engine ++ test ++ + + +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +index ccd5d1ea25c..30da259fd51 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java +@@ -73,6 +73,7 @@ import java.util.List; + import java.util.ListIterator; + import java.util.NoSuchElementException; + import java.util.stream.Collectors; ++import com.aliyun.oss.common.comm.SignVersion; + + import static org.apache.hadoop.fs.aliyun.oss.Constants.*; + +@@ -113,6 +114,16 @@ public class AliyunOSSFileSystemStore { + conf.get(USER_AGENT_PREFIX, USER_AGENT_PREFIX_DEFAULT) + ", Hadoop/" + + VersionInfo.getVersion()); + ++ String region = conf.get(REGION_KEY, ""); ++ String signatureVersion = conf.get(SIGNATURE_VERSION_KEY, SIGNATURE_VERSION_DEFAULT); ++ if ("V4".equalsIgnoreCase(signatureVersion)) { ++ clientConf.setSignatureVersion(SignVersion.V4); ++ if (StringUtils.isEmpty(region)) { ++ LOG.error("Signature version is V4 ,but region is empty."); ++ throw new IOException("SignVersion is V4 but region is empty"); ++ } ++ } ++ + String proxyHost = conf.getTrimmed(PROXY_HOST_KEY, ""); + int proxyPort = conf.getInt(PROXY_PORT_KEY, -1); + if (StringUtils.isNotEmpty(proxyHost)) { +@@ -171,6 +182,11 @@ public class AliyunOSSFileSystemStore { + statistics.incrementWriteOps(1); + } + ++ if (StringUtils.isNotEmpty(region)) { ++ ossClient.setRegion(region); ++ LOG.debug("ossClient setRegion {}", region); ++ } ++ + maxKeys = conf.getInt(MAX_PAGING_KEYS_KEY, MAX_PAGING_KEYS_DEFAULT); + int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); + if (listVersion < 1 || listVersion > 2) { +diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +index baeb9199377..176669ed152 100644 +--- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java ++++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/Constants.java +@@ -211,4 +211,19 @@ public final class Constants { + public static final String LIST_VERSION = "fs.oss.list.version"; + + public static final int DEFAULT_LIST_VERSION = 2; ++ ++ /** ++ * OSS signature version. ++ */ ++ public static final String SIGNATURE_VERSION_KEY = "fs.oss.signatureversion"; ++ ++ /** ++ * OSS signature version DEFAULT {@value}. ++ */ ++ public static final String SIGNATURE_VERSION_DEFAULT = "V1"; ++ ++ /** ++ * OSS region {@value}. ++ */ ++ public static final String REGION_KEY = "fs.oss.region"; + } +diff --git a/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +new file mode 100644 +index 00000000000..5070f2a5816 +--- /dev/null ++++ b/hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/ITAliyunOSSSignatureV4.java +@@ -0,0 +1,98 @@ ++/** ++ * Licensed to the Apache Software Foundation (ASF) under one ++ * or more contributor license agreements. See the NOTICE file ++ * distributed with this work for additional information ++ * regarding copyright ownership. The ASF licenses this file ++ * to you under the Apache License, Version 2.0 (the ++ * "License"); you may not use this file except in compliance ++ * with the License. You may obtain a copy of the License at ++ * ++ * http://www.apache.org/licenses/LICENSE-2.0 ++ * ++ * Unless required by applicable law or agreed to in writing, software ++ * distributed under the License is distributed on an "AS IS" BASIS, ++ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++ * See the License for the specific language governing permissions and ++ * limitations under the License. ++ */ ++ ++package org.apache.hadoop.fs.aliyun.oss; ++ ++import org.apache.hadoop.conf.Configuration; ++import org.apache.hadoop.fs.FileStatus; ++import org.apache.hadoop.fs.Path; ++import org.junit.Before; ++import org.junit.Test; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; ++ ++import java.io.IOException; ++import java.net.URI; ++ ++import static org.apache.hadoop.fs.aliyun.oss.Constants.REGION_KEY; ++import static org.apache.hadoop.fs.aliyun.oss.Constants.SIGNATURE_VERSION_KEY; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; ++import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; ++import static org.junit.Assert.*; ++import static org.junit.Assume.assumeNotNull; ++ ++/** ++ * Tests Aliyun OSS system. ++ */ ++public class ITAliyunOSSSignatureV4 { ++ private static final Logger LOG = LoggerFactory.getLogger(ITAliyunOSSSignatureV4.class); ++ private Configuration conf; ++ private URI testURI; ++ private Path testFile = new Path("ITAliyunOSSSignatureV4/atestr"); ++ ++ @Before ++ public void setUp() throws Exception { ++ conf = new Configuration(); ++ String bucketUri = conf.get("test.fs.oss.name"); ++ LOG.debug("bucketUri={}", bucketUri); ++ testURI = URI.create(bucketUri); ++ } ++ ++ @Test ++ public void testV4() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ conf.set(REGION_KEY, "cn-hongkong"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ createFile(fs, testFile, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile); ++ fs.delete(testFile); ++ fs.close(); ++ } ++ ++ @Test ++ public void testDefaultSignatureVersion() throws IOException { ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ fs.initialize(testURI, conf); ++ assumeNotNull(fs); ++ ++ Path testFile2 = new Path("/test/atestr"); ++ createFile(fs, testFile2, true, dataset(256, 0, 255)); ++ FileStatus status = fs.getFileStatus(testFile2); ++ fs.delete(testFile2); ++ fs.close(); ++ } ++ ++ @Test ++ public void testV4WithoutRegion() throws IOException { ++ conf.set(SIGNATURE_VERSION_KEY, "V4"); ++ AliyunOSSFileSystem fs = new AliyunOSSFileSystem(); ++ IOException expectedException = null; ++ try { ++ fs.initialize(testURI, conf); ++ } catch (IOException e) { ++ LOG.warn("use V4 , but do not set region, get exception={}", e); ++ expectedException = e; ++ assertEquals("use V4 , but do not set region", e.getMessage(), ++ "SignVersion is V4 but region is empty"); ++ } ++ assertNotNull(expectedException); ++ } ++} +diff --git a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +index bb5cbe5ec32..2167f68811a 100644 +--- a/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties ++++ b/hadoop-tools/hadoop-aliyun/src/test/resources/log4j.properties +@@ -21,3 +21,6 @@ log4j.threshold=ALL + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n ++ ++# Log all oss classes ++log4j.logger.org.apache.hadoop.fs.aliyun.oss=DEBUG +\ No newline at end of file diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch b/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..05965b1e5 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0007-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From 8177d73d3dd9260f36c8bd349a0b6602e1a8fc2c Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:43:29 +0200 +Subject: YARN-11873 Update nodejs to LTS version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index f8c7f2388f3..4522eea9fe0 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch b/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch new file mode 100644 index 000000000..7e036ad78 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/0008-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch @@ -0,0 +1,48 @@ +From dff0783ae6c7026515a264bbf0fda137331a2864 Mon Sep 17 00:00:00 2001 +From: Jim Halfpenny +Date: Mon, 16 Mar 2026 09:18:46 +0000 +Subject: HDFS-17891 fix for hostname resolution bug with datanodes + +--- + .../hadoop/hdfs/server/blockmanagement/HostSet.java | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +index d12e5fbae13..3da753f1cb0 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +@@ -18,10 +18,11 @@ + package org.apache.hadoop.hdfs.server.blockmanagement; + + +-import org.apache.hadoop.util.Preconditions; + import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; + + import java.net.InetAddress; + import java.net.InetSocketAddress; +@@ -38,6 +39,8 @@ import java.util.Map; + * .getPort() || B.getPort() == 0. + */ + public class HostSet implements Iterable { ++ private static final Logger LOG = LoggerFactory.getLogger(HostSet.class); ++ + // Host -> lists of ports + private final Multimap addrs = HashMultimap.create(); + +@@ -72,7 +75,11 @@ public class HostSet implements Iterable { + } + + void add(InetSocketAddress addr) { +- Preconditions.checkArgument(!addr.isUnresolved()); ++ LOG.debug("Adding address to HostSet: {}", addr); ++ if (addr.isUnresolved()) { ++ LOG.warn("Unresolved address not added to HostSet: {}", addr); ++ return; ++ } + addrs.put(addr.getAddress(), addr.getPort()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml b/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml new file mode 100644 index 000000000..45b693b8a --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.4.3/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hadoop.git" +base = "9d50c6884666e794e45102260a4017bb31802e1b" diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch b/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch new file mode 100644 index 000000000..14faf2a73 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0001-YARN-11527-Update-node.js.patch @@ -0,0 +1,22 @@ +From 151c1c7e4e47849a940c6c60896269f1c7bc11c8 Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:40:40 +0200 +Subject: YARN-11527-Update-node.js + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 4b9c22319cb..23657fb6e80 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v12.22.1 ++ v14.17.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch b/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch new file mode 100644 index 000000000..69ce7c696 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0002-Allow-overriding-datanode-registration-addresses.patch @@ -0,0 +1,259 @@ +From 7e5fff59ac2e2e73a6ede36fb4618c92ddf71302 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:36:20 +0200 +Subject: Allow-overriding-datanode-registration-addresses + +--- + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 9 +++ + .../blockmanagement/DatanodeManager.java | 43 +++++++----- + .../hadoop/hdfs/server/datanode/DNConf.java | 70 +++++++++++++++++++ + .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++++++++-- + 4 files changed, 135 insertions(+), 22 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +index 96226f45f6a..2dd29176df7 100755 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +@@ -154,6 +154,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final boolean DFS_DATANODE_DROP_CACHE_BEHIND_READS_DEFAULT = false; + public static final String DFS_DATANODE_USE_DN_HOSTNAME = "dfs.datanode.use.datanode.hostname"; + public static final boolean DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT = false; ++ ++ public static final String DFS_DATANODE_REGISTERED_HOSTNAME = "dfs.datanode.registered.hostname"; ++ public static final String DFS_DATANODE_REGISTERED_DATA_PORT = "dfs.datanode.registered.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTP_PORT = "dfs.datanode.registered.http.port"; ++ public static final String DFS_DATANODE_REGISTERED_HTTPS_PORT = "dfs.datanode.registered.https.port"; ++ public static final String DFS_DATANODE_REGISTERED_IPC_PORT = "dfs.datanode.registered.ipc.port"; ++ + public static final String DFS_DATANODE_MAX_LOCKED_MEMORY_KEY = "dfs.datanode.max.locked.memory"; + public static final long DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0; + public static final String DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume"; +@@ -502,6 +509,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { + public static final long DFS_DATANODE_PROCESS_COMMANDS_THRESHOLD_DEFAULT = + TimeUnit.SECONDS.toMillis(2); + ++ public static final String DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY = "dfs.namenode.datanode.registration.unsafe.allow-address-override"; ++ public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT = false; + public static final String DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY = "dfs.namenode.datanode.registration.ip-hostname-check"; + public static final boolean DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT = true; + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +index 01f1af9624d..4e7c722d03f 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +@@ -182,6 +182,8 @@ public class DatanodeManager { + private boolean hasClusterEverBeenMultiRack = false; + + private final boolean checkIpHostnameInRegistration; ++ private final boolean allowRegistrationAddressOverride; ++ + /** + * Whether we should tell datanodes what to cache in replies to + * heartbeat messages. +@@ -317,6 +319,11 @@ public class DatanodeManager { + // Block invalidate limit also has some dependency on heartbeat interval. + // Check setBlockInvalidateLimit(). + setBlockInvalidateLimit(configuredBlockInvalidateLimit); ++ this.allowRegistrationAddressOverride = conf.getBoolean( ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY, ++ DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_DEFAULT); ++ LOG.info(DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_UNSAFE_ALLOW_ADDRESS_OVERRIDE_KEY ++ + "=" + allowRegistrationAddressOverride); + this.checkIpHostnameInRegistration = conf.getBoolean( + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_KEY, + DFSConfigKeys.DFS_NAMENODE_DATANODE_REGISTRATION_IP_HOSTNAME_CHECK_DEFAULT); +@@ -1184,27 +1191,29 @@ public class DatanodeManager { + */ + public void registerDatanode(DatanodeRegistration nodeReg) + throws DisallowedDatanodeException, UnresolvedTopologyException { +- InetAddress dnAddress = Server.getRemoteIp(); +- if (dnAddress != null) { +- // Mostly called inside an RPC, update ip and peer hostname +- String hostname = dnAddress.getHostName(); +- String ip = dnAddress.getHostAddress(); +- if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { +- // Reject registration of unresolved datanode to prevent performance +- // impact of repetitive DNS lookups later. +- final String message = "hostname cannot be resolved (ip=" +- + ip + ", hostname=" + hostname + ")"; +- LOG.warn("Unresolved datanode registration: " + message); +- throw new DisallowedDatanodeException(nodeReg, message); ++ if (!allowRegistrationAddressOverride) { ++ InetAddress dnAddress = Server.getRemoteIp(); ++ if (dnAddress != null) { ++ // Mostly called inside an RPC, update ip and peer hostname ++ String hostname = dnAddress.getHostName(); ++ String ip = dnAddress.getHostAddress(); ++ if (checkIpHostnameInRegistration && !isNameResolved(dnAddress)) { ++ // Reject registration of unresolved datanode to prevent performance ++ // impact of repetitive DNS lookups later. ++ final String message = "hostname cannot be resolved (ip=" ++ + ip + ", hostname=" + hostname + ")"; ++ LOG.warn("Unresolved datanode registration: " + message); ++ throw new DisallowedDatanodeException(nodeReg, message); ++ } ++ // update node registration with the ip and hostname from rpc request ++ nodeReg.setIpAddr(ip); ++ nodeReg.setPeerHostName(hostname); + } +- // update node registration with the ip and hostname from rpc request +- nodeReg.setIpAddr(ip); +- nodeReg.setPeerHostName(hostname); + } +- ++ + try { + nodeReg.setExportedKeys(blockManager.getBlockKeys()); +- ++ + // Checks if the node is not on the hosts list. If it is not, then + // it will be disallowed from registering. + if (!hostConfigManager.isIncluded(nodeReg)) { +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +index 21b92db3073..5d3437239ce 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +@@ -101,6 +101,11 @@ public class DNConf { + final boolean syncOnClose; + final boolean encryptDataTransfer; + final boolean connectToDnViaHostname; ++ private final String registeredHostname; ++ private final int registeredDataPort; ++ private final int registeredHttpPort; ++ private final int registeredHttpsPort; ++ private final int registeredIpcPort; + final boolean overwriteDownstreamDerivedQOP; + private final boolean pmemCacheRecoveryEnabled; + +@@ -189,6 +194,11 @@ public class DNConf { + connectToDnViaHostname = getConf().getBoolean( + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, + DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME_DEFAULT); ++ registeredHostname = getConf().get(DFSConfigKeys.DFS_DATANODE_REGISTERED_HOSTNAME); ++ registeredDataPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_DATA_PORT, -1); ++ registeredHttpPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTP_PORT, -1); ++ registeredHttpsPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_HTTPS_PORT, -1); ++ registeredIpcPort = getConf().getInt(DFSConfigKeys.DFS_DATANODE_REGISTERED_IPC_PORT, -1); + this.blockReportInterval = getConf().getLong( + DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, + DFS_BLOCKREPORT_INTERVAL_MSEC_DEFAULT); +@@ -363,6 +373,66 @@ public class DNConf { + return connectToDnViaHostname; + } + ++ /** ++ * Returns a hostname to register with the cluster instead of the system ++ * hostname. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected hostname. ++ * ++ * @return null if the system hostname should be used, otherwise a hostname ++ */ ++ public String getRegisteredHostname() { ++ return registeredHostname; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * data port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredDataPort() { ++ return registeredDataPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTP port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpPort() { ++ return registeredHttpPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * HTTPS port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredHttpsPort() { ++ return registeredHttpsPort; ++ } ++ ++ /** ++ * Returns a port number to register with the cluster instead of the ++ * IPC port that the node is listening on. ++ * This is an expert setting and can be used in multihoming scenarios to ++ * override the detected port. ++ * ++ * @return -1 if the actual port should be used, otherwise a port number ++ */ ++ public int getRegisteredIpcPort() { ++ return registeredIpcPort; ++ } ++ + /** + * Returns socket timeout + * +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +index 3a1b1e07f36..6d4c356dfb6 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +@@ -135,6 +135,7 @@ import java.util.HashSet; + import java.util.Iterator; + import java.util.List; + import java.util.Map; ++import java.util.Optional; + import java.util.Map.Entry; + import java.util.Set; + import java.util.UUID; +@@ -2077,11 +2078,35 @@ public class DataNode extends ReconfigurableBase + NodeType.DATA_NODE); + } + +- DatanodeID dnId = new DatanodeID( +- streamingAddr.getAddress().getHostAddress(), hostName, +- storage.getDatanodeUuid(), getXferPort(), getInfoPort(), +- infoSecurePort, getIpcPort()); +- return new DatanodeRegistration(dnId, storageInfo, ++ String registeredHostname = Optional ++ .ofNullable(dnConf.getRegisteredHostname()) ++ .orElseGet(() -> streamingAddr.getAddress().getHostAddress()); ++ int registeredDataPort = dnConf.getRegisteredDataPort(); ++ if (registeredDataPort == -1) { ++ registeredDataPort = getXferPort(); ++ } ++ int registeredHttpPort = dnConf.getRegisteredHttpPort(); ++ if (registeredHttpPort == -1) { ++ registeredHttpPort = getInfoPort(); ++ } ++ int registeredHttpsPort = dnConf.getRegisteredHttpsPort(); ++ if (registeredHttpsPort == -1) { ++ registeredHttpsPort = getInfoSecurePort(); ++ } ++ int registeredIpcPort = dnConf.getRegisteredIpcPort(); ++ if (registeredIpcPort == -1) { ++ registeredIpcPort = getIpcPort(); ++ } ++ ++ DatanodeID dnId = new DatanodeID(registeredHostname, ++ registeredHostname, ++ storage.getDatanodeUuid(), ++ registeredDataPort, ++ registeredHttpPort, ++ registeredHttpsPort, ++ registeredIpcPort); ++ ++ return new DatanodeRegistration(dnId, storageInfo, + new ExportedBlockKeys(), VersionInfo.getVersion()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch b/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch new file mode 100644 index 000000000..25acd19ff --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0003-Async-profiler-also-grab-itimer-events.patch @@ -0,0 +1,29 @@ +From eabd444dec4a472f4d4d3bb1a449799c14aea2d5 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:39:20 +0200 +Subject: Async-profiler-also-grab-itimer-events + +--- + .../src/main/java/org/apache/hadoop/http/ProfileServlet.java | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +index ce532741512..909892ff903 100644 +--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java ++++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java +@@ -76,6 +76,7 @@ import org.apache.hadoop.util.ProcessUtils; + * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu ++ * // itimer + * // page-faults + * // context-switches + * // cycles +@@ -118,6 +119,7 @@ public class ProfileServlet extends HttpServlet { + private enum Event { + + CPU("cpu"), ++ ITIMER("itimer"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch b/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch new file mode 100644 index 000000000..073949ac1 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0004-Update-CycloneDX-plugin.patch @@ -0,0 +1,35 @@ +From dce9e64ac4ff8906108a1853e6d1003d3fe9a313 Mon Sep 17 00:00:00 2001 +From: xeniape +Date: Wed, 10 Sep 2025 14:41:18 +0200 +Subject: Update-CycloneDX-plugin + +--- + pom.xml | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/pom.xml b/pom.xml +index bb9fb62180d..62dd33d6485 100644 +--- a/pom.xml ++++ b/pom.xml +@@ -830,6 +830,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx.version} ++ ++ application ++ 1.5 ++ false ++ + + + package +@@ -838,9 +843,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x + + + +- +- xml +- + + + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch b/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch new file mode 100644 index 000000000..ae977e796 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0005-YARN-11873-Update-nodejs-to-LTS-version.patch @@ -0,0 +1,22 @@ +From a8543f42f9cb3d88b7c030df1870de809a07949b Mon Sep 17 00:00:00 2001 +From: Sebastian Bernauer +Date: Tue, 26 May 2026 15:43:29 +0200 +Subject: YARN-11873 Update nodejs to LTS version + +--- + hadoop-project/pom.xml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml +index 23657fb6e80..0880f5acdb5 100644 +--- a/hadoop-project/pom.xml ++++ b/hadoop-project/pom.xml +@@ -236,7 +236,7 @@ + 5.4.0 + 10.4 + 1.0-1 +- v14.17.0 ++ v22.20.0 + v1.22.5 + 1.10.13 + 1.20 diff --git a/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch b/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch new file mode 100644 index 000000000..e74826088 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/0006-HDFS-17891-fix-for-hostname-resolution-bug-with-data.patch @@ -0,0 +1,48 @@ +From 065f4c5ccbd462ee04fb7a63fb38026c7c7d0628 Mon Sep 17 00:00:00 2001 +From: Jim Halfpenny +Date: Mon, 16 Mar 2026 09:18:46 +0000 +Subject: HDFS-17891 fix for hostname resolution bug with datanodes + +--- + .../hadoop/hdfs/server/blockmanagement/HostSet.java | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +index d12e5fbae13..3da753f1cb0 100644 +--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java ++++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostSet.java +@@ -18,10 +18,11 @@ + package org.apache.hadoop.hdfs.server.blockmanagement; + + +-import org.apache.hadoop.util.Preconditions; + import org.apache.hadoop.thirdparty.com.google.common.collect.HashMultimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.Multimap; + import org.apache.hadoop.thirdparty.com.google.common.collect.UnmodifiableIterator; ++import org.slf4j.Logger; ++import org.slf4j.LoggerFactory; + + import java.net.InetAddress; + import java.net.InetSocketAddress; +@@ -38,6 +39,8 @@ import java.util.Map; + * .getPort() || B.getPort() == 0. + */ + public class HostSet implements Iterable { ++ private static final Logger LOG = LoggerFactory.getLogger(HostSet.class); ++ + // Host -> lists of ports + private final Multimap addrs = HashMultimap.create(); + +@@ -72,7 +75,11 @@ public class HostSet implements Iterable { + } + + void add(InetSocketAddress addr) { +- Preconditions.checkArgument(!addr.isUnresolved()); ++ LOG.debug("Adding address to HostSet: {}", addr); ++ if (addr.isUnresolved()) { ++ LOG.warn("Unresolved address not added to HostSet: {}", addr); ++ return; ++ } + addrs.put(addr.getAddress(), addr.getPort()); + } + diff --git a/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml b/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml new file mode 100644 index 000000000..972e228e7 --- /dev/null +++ b/hadoop/hadoop/stackable/patches/3.5.0/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/hadoop.git" +base = "dbcc7cd797100e6b32cd84f85b53a5193a5f9af0" diff --git a/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml b/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml new file mode 100644 index 000000000..406dfb637 --- /dev/null +++ b/hadoop/hdfs-utils/stackable/patches/0.6.0/patchable.toml @@ -0,0 +1,2 @@ +# FIXME: Use released commit +base = "b3ebd854f36395cfc3ebbbb71f868f2a6ec7b0fa" diff --git a/rust/patchable/README.md b/rust/patchable/README.md index 83ebc8064..9bbcf310d 100644 --- a/rust/patchable/README.md +++ b/rust/patchable/README.md @@ -200,7 +200,7 @@ cargo patchable checkout druid 26.0.0 cargo patchable init version druid 28.0.0 --upstream https://github.com/apache/druid.git --base druid-28.0.0 # Create and go to the worktree for the new version -pushd $(cargo patchable checkout druid 28.0.0) +pushd $(cargo patchable checkout hadoop/hadoop 3.5.0) # Cherry pick the old patch series git cherry-pick patchable/base/26.0.0..patchable/26.0.0 diff --git a/shared/boost/Dockerfile b/shared/boost/Dockerfile new file mode 100644 index 000000000..7c517e660 --- /dev/null +++ b/shared/boost/Dockerfile @@ -0,0 +1,57 @@ +# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# check=error=true + +# The boost library is a runtime dependency of the Hadoop native code +# but is not available in the RedHat UBI10 or EPEL10 repositories, so we need to build +# it ourselves. +# Additionally, UBI ships a single supported boost version, which works for some product versions, +# but not for all. +# +# NOTE: We use a published source bundle instead of the patchable workflow +# because boost uses git submodules for its build system +# and patchable doesn't support these. +# The source bundle contains everything needed to build b2 (the build system) +# and boost except for dependencies which are automatically discovered. +# +FROM local-image/stackable-base AS boost-builder + +ARG BOOST_VERSION + +WORKDIR /tmp + +RUN <= 2.34 makes PTHREAD_STACK_MIN a sysconf() call instead of a compile-time +# constant, which breaks the `#if PTHREAD_STACK_MIN > 0` preprocessor check in older +# boost versions (e.g. 1.72.0). Apply the upstream fix: use `#ifdef`. +# See: https://github.com/boostorg/thread/commit/74fb0a26099bc51d717f5f154b37231ce7df3e98 +# +# Can be removed once we drop boost 1.72.0 support. +THREAD_DATA="boost/thread/pthread/thread_data.hpp" +if [ -f "${THREAD_DATA}" ]; then + sed -i \ + -e 's/^#if PTHREAD_STACK_MIN > 0$/#ifdef PTHREAD_STACK_MIN/' \ + "${THREAD_DATA}" +fi + +./bootstrap.sh --prefix=/stackable/boost +./b2 -j"$(nproc)" \ + link=shared runtime-link=shared variant=release threading=multi \ + install +rm -rf /tmp/* +EOF diff --git a/shared/boost/boil-config.toml b/shared/boost/boil-config.toml new file mode 100644 index 000000000..02a259508 --- /dev/null +++ b/shared/boost/boil-config.toml @@ -0,0 +1,17 @@ +[versions."1.72.0".local-images] +stackable-base = "1.0.0" + +[versions."1.72.0".build-arguments] +boost-version = "1.72.0" + +[versions."1.78.0".local-images] +stackable-base = "1.0.0" + +[versions."1.78.0".build-arguments] +boost-version = "1.78.0" + +[versions."1.86.0".local-images] +stackable-base = "1.0.0" + +[versions."1.86.0".build-arguments] +boost-version = "1.86.0" diff --git a/shared/protobuf/stackable/patches/3.21.12/patchable.toml b/shared/protobuf/stackable/patches/3.21.12/patchable.toml new file mode 100644 index 000000000..6b38d2fb4 --- /dev/null +++ b/shared/protobuf/stackable/patches/3.21.12/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/protobuf.git" +base = "f0dc78d7e6e331b8c6bb2d5283e06aa26883ca7c" diff --git a/shared/protobuf/stackable/patches/3.25.5/patchable.toml b/shared/protobuf/stackable/patches/3.25.5/patchable.toml new file mode 100644 index 000000000..c707e1755 --- /dev/null +++ b/shared/protobuf/stackable/patches/3.25.5/patchable.toml @@ -0,0 +1,2 @@ +mirror = "https://github.com/stackabletech/protobuf.git" +base = "9d0ec0f92b5b5fdeeda11f9dcecc1872ff378014" From 74a792eb8119a93d587a8300b436b6836f4338b5 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Wed, 27 May 2026 15:48:23 +0200 Subject: [PATCH 02/11] revert accidential change --- rust/patchable/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/patchable/README.md b/rust/patchable/README.md index 9bbcf310d..83ebc8064 100644 --- a/rust/patchable/README.md +++ b/rust/patchable/README.md @@ -200,7 +200,7 @@ cargo patchable checkout druid 26.0.0 cargo patchable init version druid 28.0.0 --upstream https://github.com/apache/druid.git --base druid-28.0.0 # Create and go to the worktree for the new version -pushd $(cargo patchable checkout hadoop/hadoop 3.5.0) +pushd $(cargo patchable checkout druid 28.0.0) # Cherry pick the old patch series git cherry-pick patchable/base/26.0.0..patchable/26.0.0 From ec494d33508736bc19b54bcb6c408b5aada17cbc Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 28 May 2026 07:41:15 +0200 Subject: [PATCH 03/11] Bump HDFS dependants 3.4.2 -> 3.4.3 --- druid/boil-config.toml | 4 +- hbase/boil-config.toml | 4 +- hbase/hbase-operator-tools/boil-config.toml | 4 +- hbase/hbase/boil-config.toml | 4 +- hbase/phoenix/boil-config.toml | 4 +- hive/boil-config.toml | 26 ++++----- .../boil-config.toml | 12 ++-- spark-k8s/boil-config.toml | 57 ++++++++++--------- spark-k8s/hbase-connectors/boil-config.toml | 4 +- 9 files changed, 57 insertions(+), 62 deletions(-) diff --git a/druid/boil-config.toml b/druid/boil-config.toml index 8910b6818..ba6da3a1d 100644 --- a/druid/boil-config.toml +++ b/druid/boil-config.toml @@ -15,7 +15,7 @@ authorizer-version = "0.7.0" # https://druid.apache.org/docs/34.0.0/operations/java/ java-base = "17" java-devel = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # Deprecated since 26.3 [versions."34.0.0".build-arguments] @@ -26,7 +26,7 @@ authorizer-version = "0.7.0" # https://druid.apache.org/docs/35.0.1/operations/java/ java-base = "21" java-devel = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" [versions."35.0.1".build-arguments] authorizer-version = "0.7.0" diff --git a/hbase/boil-config.toml b/hbase/boil-config.toml index 66d1af206..2e5c06b00 100644 --- a/hbase/boil-config.toml +++ b/hbase/boil-config.toml @@ -6,7 +6,7 @@ "hbase/hbase-operator-tools" = "1.3.0-hbase2.6.3" "hbase/phoenix" = "5.2.1-hbase2.6.3" "hbase/hbase-opa-authorizer" = "0.3.0" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" @@ -19,7 +19,7 @@ delete-caches = "true" "hbase/hbase-operator-tools" = "1.3.0-hbase2.6.4" "hbase/phoenix" = "5.3.0-hbase2.6.4" "hbase/hbase-opa-authorizer" = "0.3.0" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" diff --git a/hbase/hbase-operator-tools/boil-config.toml b/hbase/hbase-operator-tools/boil-config.toml index ebde2a084..26d5b2381 100644 --- a/hbase/hbase-operator-tools/boil-config.toml +++ b/hbase/hbase-operator-tools/boil-config.toml @@ -1,5 +1,5 @@ [versions."1.3.0-hbase2.6.3".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" "hbase/hbase" = "2.6.3" "java-devel" = "11" @@ -11,7 +11,7 @@ delete-caches = "true" # Note: The next Hbase version should use 1.4.0 if it is released. # At the time of writing, the latest commit is b22878f. [versions."1.3.0-hbase2.6.4".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" "hbase/hbase" = "2.6.4" "java-devel" = "11" diff --git a/hbase/hbase/boil-config.toml b/hbase/hbase/boil-config.toml index 85a715450..af8073a05 100644 --- a/hbase/hbase/boil-config.toml +++ b/hbase/hbase/boil-config.toml @@ -1,5 +1,5 @@ [versions."2.6.3".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" @@ -8,7 +8,7 @@ async-profiler-version = "2.9" delete-caches = "true" [versions."2.6.4".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "11" java-devel = "11" diff --git a/hbase/phoenix/boil-config.toml b/hbase/phoenix/boil-config.toml index 3470920fc..76ca067c3 100644 --- a/hbase/phoenix/boil-config.toml +++ b/hbase/phoenix/boil-config.toml @@ -1,6 +1,6 @@ [versions."5.2.1-hbase2.6.3".local-images] "hbase/hbase" = "2.6.3" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-devel = "11" [versions."5.2.1-hbase2.6.3".build-arguments] @@ -10,7 +10,7 @@ delete-caches = "true" [versions."5.3.0-hbase2.6.4".local-images] "hbase/hbase" = "2.6.4" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-devel = "11" [versions."5.3.0-hbase2.6.4".build-arguments] diff --git a/hive/boil-config.toml b/hive/boil-config.toml index 6731797dc..fef8d2fd5 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -50,32 +50,26 @@ azure-keyvault-core-version = "1.0.0" # Hive 4.1 requires Java 17 (according to GitHub README) java-base = "17" java-devel = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer -"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.2" +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.3" [versions."4.1.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 -aws-java-sdk-bundle-version = "2.29.52" -# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 -azure-storage-version = "7.0.1" -# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 -azure-keyvault-core-version = "1.0.0" +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 [versions."4.2.0".local-images] # Hive 4.2 requires Java 21 (according to GitHub README) java-base = "21" java-devel = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" # hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer -"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.2.0-hadoop-3.4.2" +"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.2.0-hadoop-3.4.3" [versions."4.2.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from hadoop-aws: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.2 -aws-java-sdk-bundle-version = "2.29.52" -# Keep consistent with the dependency from hadoop-azure: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.2 -azure-storage-version = "7.0.1" -# Keep consistent with the dependency from azure-storage: https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 -azure-keyvault-core-version = "1.0.0" +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 diff --git a/hive/hive-metastore-opa-authorizer/boil-config.toml b/hive/hive-metastore-opa-authorizer/boil-config.toml index a2eb55fcd..2808be73d 100644 --- a/hive/hive-metastore-opa-authorizer/boil-config.toml +++ b/hive/hive-metastore-opa-authorizer/boil-config.toml @@ -25,20 +25,20 @@ authorizer-version = "v1.0.0" hive-version = "4.0.1" delete-caches = "true" -[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".local-images] +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.3".local-images] "java-devel" = "17" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" -[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".build-arguments] +[versions."v1.0.0-hive-4.1.0-hadoop-3.4.3".build-arguments] authorizer-version = "v1.0.0" hive-version = "4.1.0" delete-caches = "true" -[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".local-images] +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.3".local-images] "java-devel" = "21" -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" -[versions."v1.0.0-hive-4.2.0-hadoop-3.4.2".build-arguments] +[versions."v1.0.0-hive-4.2.0-hadoop-3.4.3".build-arguments] authorizer-version = "v1.0.0" hive-version = "4.2.0" delete-caches = "true" diff --git a/spark-k8s/boil-config.toml b/spark-k8s/boil-config.toml index 1ebde375c..dfe1fa992 100644 --- a/spark-k8s/boil-config.toml +++ b/spark-k8s/boil-config.toml @@ -5,7 +5,7 @@ containerfile = "Dockerfile.3" [versions."3.5.7".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "17" java-devel = "17" hbase = "2.6.3" @@ -13,12 +13,13 @@ hbase = "2.6.3" [versions."3.5.7".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.15.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 -stax2-api-version = "4.2.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 + +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" hbase-connector-version = "1.0.1_3.5.7" @@ -27,7 +28,7 @@ hbase-connector-version = "1.0.1_3.5.7" containerfile = "Dockerfile.3" [versions."3.5.8".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "17" java-devel = "17" hbase = "2.6.3" @@ -35,12 +36,12 @@ hbase = "2.6.3" [versions."3.5.8".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.15.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 -stax2-api-version = "4.2.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 -woodstox-core-version = "6.5.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" hbase-connector-version = "1.0.1_3.5.8" @@ -49,18 +50,18 @@ hbase-connector-version = "1.0.1_3.5.8" containerfile = "Dockerfile.4" [versions."4.0.1".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "21" java-devel = "21" [versions."4.0.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.18.2" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/4.0.1 -stax2-api-version = "4.2.2" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 -woodstox-core-version = "7.0.0" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.18.2 +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" @@ -68,17 +69,17 @@ tini-version = "0.19.0" containerfile = "Dockerfile.4" [versions."4.1.1".local-images] -"hadoop/hadoop" = "3.4.2" +"hadoop/hadoop" = "3.4.3" java-base = "21" java-devel = "21" [versions."4.1.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.29.52" # needs to match the version shipped by Hadoop -azure-storage-version = "7.0.1" # needs to match the version shipped by Hadoop -azure-keyvault-core-version = "1.0.0" # needs to match the version shipped by Hadoop -jackson-dataformat-xml-version = "2.20.0" # needs to match the version shipped by Spark https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.13/4.1.1/dependencies -stax2-api-version = "4.2.2" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies -woodstox-core-version = "7.1.1" # needs to match the jackson version https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.20.0/dependencies +aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 +azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 +azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 +jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 +stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 +woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 jmx-exporter-version = "1.3.0" tini-version = "0.19.0" diff --git a/spark-k8s/hbase-connectors/boil-config.toml b/spark-k8s/hbase-connectors/boil-config.toml index 6cf4c0791..d661b5906 100644 --- a/spark-k8s/hbase-connectors/boil-config.toml +++ b/spark-k8s/hbase-connectors/boil-config.toml @@ -4,7 +4,7 @@ java-devel = "17" [versions."1.0.1_3.5.7".build-arguments] spark-version = "3.5.7" -hadoop-version = "3.4.2" +hadoop-version = "3.4.3" hbase-version = "2.6.3" [versions."1.0.1_3.5.8".local-images] @@ -12,5 +12,5 @@ java-devel = "17" [versions."1.0.1_3.5.8".build-arguments] spark-version = "3.5.8" -hadoop-version = "3.4.2" +hadoop-version = "3.4.3" hbase-version = "2.6.3" From 61741f635ee0bd769e59056e81c472c762462d83 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 28 May 2026 07:45:54 +0200 Subject: [PATCH 04/11] Fix leftover EOF --- shared/boost/Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/shared/boost/Dockerfile b/shared/boost/Dockerfile index 7c517e660..a918042ab 100644 --- a/shared/boost/Dockerfile +++ b/shared/boost/Dockerfile @@ -23,9 +23,6 @@ RUN < Date: Thu, 28 May 2026 07:50:24 +0200 Subject: [PATCH 05/11] Fix hadolint --- hadoop/hadoop/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index f5b8877bc..34adea3a7 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -50,7 +50,7 @@ RUN < 3.21 bundles abseil-cpp and utf8_range as git submodules git submodule update --init --recursive cmake -S . -B build -DCMAKE_POSITION_INDEPENDENT_CODE=ON -Dprotobuf_BUILD_TESTS=OFF - cmake --build build --parallel $(nproc) + cmake --build build --parallel "$(nproc)" cmake --install build --prefix /opt/protobuf fi (cd .. && rm -r ${PROTOBUF_VERSION}) From af0a4eb503907d1ce21bec0f2a38dbb858aa4bf0 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 28 May 2026 14:53:03 +0200 Subject: [PATCH 06/11] WIP: Move s3a and abfs jar collection into hadoop builder image --- hadoop/hadoop/Dockerfile | 29 +++++++++++++++++++++ hadoop/hadoop/boil-config.toml | 14 ++++++++++ hbase/Dockerfile | 11 ++------ hive/Dockerfile | 47 +++++++++++----------------------- hive/boil-config.toml | 18 ------------- spark-k8s/Dockerfile.3 | 17 ++---------- spark-k8s/Dockerfile.4 | 17 ++---------- spark-k8s/boil-config.toml | 12 --------- 8 files changed, 64 insertions(+), 101 deletions(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index 34adea3a7..62c139bbe 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -7,6 +7,9 @@ FROM local-image/java-devel AS hadoop-builder ARG PRODUCT_VERSION ARG RELEASE_VERSION ARG PROTOBUF_VERSION +ARG AWS_JAVA_SDK_BUNDLE_VERSION +ARG AZURE_STORAGE_VERSION +ARG AZURE_KEYVAULT_CORE_VERSION ARG STACKABLE_USER_UID WORKDIR /stackable @@ -90,6 +93,7 @@ tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz . # Therefore, this build does work but the final image does NOT contain the openssl-devel package which is why it fails there which is why we have to create the symlink over there manually. # We still leave this flag in to automatically fail should anything with the packages or symlinks ever fail. mvn \ + --threads 1C \ clean package install \ -Pdist,native \ -pl '!hadoop-tools/hadoop-pipes' \ @@ -140,3 +144,28 @@ rm -rf /stackable/.m2 # Set correct groups; make sure only required artifacts for the final image are located in /stackable chmod -R g=u /stackable EOF + +RUN < below cp /stackable/patched-libs/maven/org/apache/hadoop/hadoop-mapreduce-client-core/${HADOOP_VERSION}-stackable${RELEASE_VERSION}/hadoop-mapreduce-client-core-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! - -# Add S3 Support for Hive (support for s3a://) -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ - -# According to https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/aws_sdk_upgrade.html, the jar filename has changed from -# aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar to bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar. In future, you might need to do: -if [[ "${PRODUCT_VERSION}" == "3.1.3" || "${PRODUCT_VERSION}" == 4.0.* ]]; then -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -else -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib/ -fi - -# Add Azure ABFS support (support for abfs://) -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ -cp /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/ - # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available # and we are sometimes running into errors because we're out of space. diff --git a/hive/boil-config.toml b/hive/boil-config.toml index fef8d2fd5..b2694f47b 100644 --- a/hive/boil-config.toml +++ b/hive/boil-config.toml @@ -11,10 +11,6 @@ java-devel = "8" [versions."3.1.3".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.0.0".local-images] # Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 @@ -26,10 +22,6 @@ java-devel = "8" [versions."4.0.0".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.0.1".local-images] # Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11 @@ -41,10 +33,6 @@ java-devel = "8" [versions."4.0.1".build-arguments] jmx-exporter-version = "1.3.0" -# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6 -aws-java-sdk-bundle-version = "1.12.367" -azure-storage-version = "7.0.1" -azure-keyvault-core-version = "1.0.0" [versions."4.1.0".local-images] # Hive 4.1 requires Java 17 (according to GitHub README) @@ -56,9 +44,6 @@ java-devel = "17" [versions."4.1.0".build-arguments] jmx-exporter-version = "1.3.0" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 [versions."4.2.0".local-images] # Hive 4.2 requires Java 21 (according to GitHub README) @@ -70,6 +55,3 @@ java-devel = "21" [versions."4.2.0".build-arguments] jmx-exporter-version = "1.3.0" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 diff --git a/spark-k8s/Dockerfile.3 b/spark-k8s/Dockerfile.3 index 71a786516..183a79ae9 100644 --- a/spark-k8s/Dockerfile.3 +++ b/spark-k8s/Dockerfile.3 @@ -44,9 +44,6 @@ ARG HADOOP_HADOOP_VERSION # Reassign the arg to `HADOOP_VERSION` for better readability. ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} ARG HBASE_VERSION -ARG AWS_JAVA_SDK_BUNDLE_VERSION -ARG AZURE_STORAGE_VERSION -ARG AZURE_KEYVAULT_CORE_VERSION ARG JACKSON_DATAFORMAT_XML_VERSION ARG STAX2_API_VERSION ARG WOODSTOX_CORE_VERSION @@ -108,18 +105,8 @@ EOF WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars -# Copy modules required for s3a:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ - ./ - -# Copy modules required for abfs:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ - ./ +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar ./ # Copy the HBase connector including required modules COPY --from=hbase-connectors-builder --chown=${STACKABLE_USER_UID}:0 \ diff --git a/spark-k8s/Dockerfile.4 b/spark-k8s/Dockerfile.4 index bc61acdaf..ba6e40a3c 100644 --- a/spark-k8s/Dockerfile.4 +++ b/spark-k8s/Dockerfile.4 @@ -36,9 +36,6 @@ ARG PRODUCT_VERSION ARG HADOOP_HADOOP_VERSION # Reassign the arg to `HADOOP_VERSION` for better readability. ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION} -ARG AWS_JAVA_SDK_BUNDLE_VERSION -ARG AZURE_STORAGE_VERSION -ARG AZURE_KEYVAULT_CORE_VERSION ARG JACKSON_DATAFORMAT_XML_VERSION ARG STAX2_API_VERSION ARG WOODSTOX_CORE_VERSION @@ -104,18 +101,8 @@ EOF WORKDIR /stackable/spark-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}/dist/jars -# Copy modules required for s3a:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar \ - ./ - -# Copy modules required for abfs:// -COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 \ - /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP_VERSION}-stackable${RELEASE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar \ - /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar \ - ./ +# Copy cloud libraries for s3a:// and abfs:// support +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop-cloud-libraries/*.jar ./ COPY spark-k8s/stackable/jmx /stackable/jmx diff --git a/spark-k8s/boil-config.toml b/spark-k8s/boil-config.toml index dfe1fa992..c724bcf9b 100644 --- a/spark-k8s/boil-config.toml +++ b/spark-k8s/boil-config.toml @@ -14,9 +14,6 @@ hbase = "2.6.3" [versions."3.5.7".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 @@ -36,9 +33,6 @@ hbase = "2.6.3" [versions."3.5.8".build-arguments] python-version = "3.11" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 @@ -56,9 +50,6 @@ java-devel = "21" [versions."4.0.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 @@ -75,9 +66,6 @@ java-devel = "21" [versions."4.1.1".build-arguments] python-version = "3.12" -aws-java-sdk-bundle-version = "2.35.4" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.4.3 -azure-storage-version = "7.0.1" # Needs to match https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-azure/3.4.3 -azure-keyvault-core-version = "1.0.0" # Needs to match https://mvnrepository.com/artifact/com.microsoft.azure/azure-storage/7.0.1 jackson-dataformat-xml-version = "2.15.2" # Needs to match https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12/3.5.7 stax2-api-version = "4.2.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 woodstox-core-version = "6.5.1" # Needs to match https://mvnrepository.com/artifact/com.fasterxml.jackson.dataformat/jackson-dataformat-xml/2.15.2 From bda9e9cf7b913d673835881f1ab1a1483fb2c43a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 28 May 2026 15:34:29 +0200 Subject: [PATCH 07/11] Fixup aws-sdk file name --- hadoop/hadoop/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index 62c139bbe..c485d3f1e 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -154,7 +154,7 @@ mkdir -p /stackable/hadoop-cloud-libraries/ if [[ "$PRODUCT_VERSION" == "3.3.6" || "$PRODUCT_VERSION" == "3.4.2" ]]; then cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ - cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ + cp /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar /stackable/hadoop-cloud-libraries/ From 7499b67b4104e1508b23669438b404e4b5637c87 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 28 May 2026 15:37:20 +0200 Subject: [PATCH 08/11] Add note --- hadoop/hadoop/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index c485d3f1e..8bddd7bd6 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -154,6 +154,7 @@ mkdir -p /stackable/hadoop-cloud-libraries/ if [[ "$PRODUCT_VERSION" == "3.3.6" || "$PRODUCT_VERSION" == "3.4.2" ]]; then cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ + # For some reason it was renamed from "aws-java-sdk-bundle" to "bundle" cp /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/hadoop-cloud-libraries/ From cc8731ab6a12a7abf9192a7f7fd6b32a7b2ed1e4 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 29 May 2026 07:30:05 +0200 Subject: [PATCH 09/11] Switch to Nexus (as it now works again) --- hadoop/hadoop/Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index 8bddd7bd6..5a4ccebbd 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -162,9 +162,7 @@ if [[ "$PRODUCT_VERSION" == "3.3.6" || "$PRODUCT_VERSION" == "3.4.2" ]]; then else cp /stackable/hadoop/share/hadoop/common/lib/hadoop-aws-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ # Starting with Hadoop 3.4.3 the aws-java-sdk-bundle is not included any more. - # TODO: Use Nexus, but I currently can't upload. - # curl -o /stackable/apache-hive-metastore-${NEW_VERSION}-bin/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar --fail "https://repo.stackable.tech/repository/packages/aws/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar" - curl -o /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar --fail "https://repo1.maven.org/maven2/software/amazon/awssdk/bundle/${AWS_JAVA_SDK_BUNDLE_VERSION}/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar" + curl -o /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar --fail "https://repo.stackable.tech/repository/packages/aws/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar" cp /stackable/hadoop/share/hadoop/common/lib/hadoop-azure-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/common/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/common/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar /stackable/hadoop-cloud-libraries/ From eaf0db8a7199a2c70bc7b97357cb1601e2761f3a Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 29 May 2026 09:24:54 +0200 Subject: [PATCH 10/11] Remove parallel maven builds again --- hadoop/hadoop/Dockerfile | 1 - hive/Dockerfile | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index 5a4ccebbd..be1842bb8 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -93,7 +93,6 @@ tar -czf /stackable/hadoop-${NEW_VERSION}-src.tar.gz . # Therefore, this build does work but the final image does NOT contain the openssl-devel package which is why it fails there which is why we have to create the symlink over there manually. # We still leave this flag in to automatically fail should anything with the packages or symlinks ever fail. mvn \ - --threads 1C \ clean package install \ -Pdist,native \ -pl '!hadoop-tools/hadoop-pipes' \ diff --git a/hive/Dockerfile b/hive/Dockerfile index 0d9d8f974..0986c3b66 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -61,7 +61,6 @@ tar -czf /stackable/hive-${NEW_VERSION}-src.tar.gz . if [[ "${PRODUCT_VERSION}" == "3.1.3" ]] ; then mvn \ - --threads 1C \ clean package \ -DskipTests \ --projects standalone-metastore @@ -71,7 +70,6 @@ elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then ( # https://issues.apache.org/jira/browse/HIVE-20451 switched the metastore server packaging starting with 4.0.0 mvn \ - --threads 1C \ clean package \ -DskipTests \ -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} @@ -85,7 +83,7 @@ elif [[ "${PRODUCT_VERSION}" == 4.0.* ]]; then ) else # Starting with 4.1.0 the build process changed again in https://github.com/apache/hive/pull/5936 (HIVE-29062) - mvn --threads 1C \ + mvn \ clean package \ -Dhadoop.version=${HADOOP_VERSION}-stackable${RELEASE_VERSION} \ -DskipTests \ From a228ca73e60e49953c6281ae36ac254c6e9238a5 Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Fri, 29 May 2026 12:46:54 +0200 Subject: [PATCH 11/11] Fix another bundle name --- hadoop/hadoop/Dockerfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop/hadoop/Dockerfile b/hadoop/hadoop/Dockerfile index be1842bb8..fe6429eee 100644 --- a/hadoop/hadoop/Dockerfile +++ b/hadoop/hadoop/Dockerfile @@ -154,7 +154,11 @@ mkdir -p /stackable/hadoop-cloud-libraries/ if [[ "$PRODUCT_VERSION" == "3.3.6" || "$PRODUCT_VERSION" == "3.4.2" ]]; then cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ # For some reason it was renamed from "aws-java-sdk-bundle" to "bundle" - cp /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar + if [[ "$PRODUCT_VERSION" == "3.3.6" ]]; then + cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar + else + cp /stackable/hadoop/share/hadoop/tools/lib/bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar /stackable/hadoop-cloud-libraries/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE_VERSION}.jar + fi cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${NEW_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE_VERSION}.jar /stackable/hadoop-cloud-libraries/ cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE_VERSION}.jar /stackable/hadoop-cloud-libraries/