Hello!
I have an issue when adding a controller to a cluster from another controller. Here is what I did:
My entire infrastructure is running inside a Docker Compose setup, which allows me to test different types of infrastructures.
I currently have three controllers (I’m trying to add them to a cluster).
First, I created the PKI with a root CA and three intermediate CAs, as specified in the documentation:
#!/bin/bash
function wait_for_internet {
while ! echo > /dev/tcp/1.1.1.1/80 ; do
echo "Internet unavailable, retrying..."
sleep 1
done
echo "Internet is available ✅"
}
function install_openziti_binary {
wait_for_internet
curl -sS https://get.openziti.io/install.bash | bash -s openziti
if command -v ziti &> /dev/null; then
echo "OpenZiti binary installed successfully ✅"
else
echo "Failed to install OpenZiti binary ❌"
exit 1
fi
}
function create_root_ca {
# Create the trust root, a self-signed CA
ziti pki create ca \
--pki-root /pki --ca-file root --ca-name 'Cluster Root CA' \
--trust-domain ha.test
}
function create_controllers_certs {
# Create the controller 1 intermediate/signing cert
ziti pki create intermediate \
--pki-root /pki \
--ca-name root \
--intermediate-file ctrl1 \
--intermediate-name 'Controller One Signing Cert'
# Create the controller 1 server cert
ziti pki create server \
--pki-root /pki \
--ca-name ctrl1 \
--dns "localhost,ctrl1.ziti.example.com,controller1" \
--ip "127.0.0.1,::1,controller1" \
--server-name ctrl1 \
--spiffe-id 'controller/ctrl1'
# Create the controller 1 client cert
ziti pki create client \
--pki-root /pki \
--ca-name ctrl1 \
--client-name ctrl1 \
--spiffe-id 'controller/ctrl1'
# Create the controller 2 intermediate/signing cert
ziti pki create intermediate \
--pki-root /pki \
--ca-name root \
--intermediate-file ctrl2 \
--intermediate-name 'Controller Two Signing Cert'
# Create the controller 2 server cert
ziti pki create server \
--pki-root /pki \
--ca-name ctrl2 \
--dns "localhost,ctrl2.ziti.example.com,controller2" \
--ip "127.0.0.1,::1,controller2" \
--server-name ctrl2 \
--spiffe-id 'controller/ctrl2'
# Create the controller 2 client cert
ziti pki create client \
--pki-root /pki \
--ca-name ctrl2 \
--client-name ctrl2 \
--spiffe-id 'controller/ctrl2'
# Create the controller 3 intermediate/signing cert
ziti pki create intermediate \
--pki-root /pki \
--ca-name root \
--intermediate-file ctrl3 \
--intermediate-name 'Controller Three Signing Cert'
# Create the controller 3 server cert
ziti pki create server \
--pki-root /pki \
--ca-name ctrl3 \
--dns "localhost,ctrl3.ziti.example.com,controller3" \
--ip "127.0.0.1,::1,controller3" \
--server-name ctrl3 \
--spiffe-id 'controller/ctrl3'
# Create the controller 3 client cert
ziti pki create client \
--pki-root /pki \
--ca-name ctrl3 \
--client-name ctrl3 \
--spiffe-id 'controller/ctrl3'
}
function main {
wait_for_internet
apt update
apt install -y iproute2 jq tcpdump iptables curl iputils-ping wget iproute2 net-tools gnupg dnsutils
if ! install_openziti_binary ; then
echo "Installation of OpenZiti binary failed. Exiting."
exit 1
fi
rm -rf /pki/*
create_root_ca
create_controllers_certs
rm -rf /shared_pki/*
cp -Rv /pki /shared_pki
}
main "$@"
So it will create a shared volume with those certs:
root@controller1:/# tree /controller_certs/pki/
/controller_certs/pki/
|-- ctrl1
| |-- certs
| | |-- client.cert
| | |-- client.chain.pem
| | |-- ctrl1.cert
| | |-- ctrl1.chain.pem
| | |-- server.cert
| | `-- server.chain.pem
| |-- crlnumber
| |-- crls
| |-- index.txt
| |-- index.txt.attr
| |-- keys
| | |-- client.key
| | |-- ctrl1.key
| | `-- server.key
| `-- serial
|-- ctrl2
| |-- certs
| | |-- client.cert
| | |-- client.chain.pem
| | |-- ctrl2.cert
| | |-- ctrl2.chain.pem
| | |-- server.cert
| | `-- server.chain.pem
| |-- crlnumber
| |-- crls
| |-- index.txt
| |-- index.txt.attr
| |-- keys
| | |-- client.key
| | |-- ctrl2.key
| | `-- server.key
| `-- serial
|-- ctrl3
| |-- certs
| | |-- client.cert
| | |-- client.chain.pem
| | |-- ctrl3.cert
| | |-- ctrl3.chain.pem
| | |-- server.cert
| | `-- server.chain.pem
| |-- crlnumber
| |-- crls
| |-- index.txt
| |-- index.txt.attr
| |-- keys
| | |-- client.key
| | |-- ctrl3.key
| | `-- server.key
| `-- serial
`-- root
|-- certs
| |-- ctrl1.cert
| |-- ctrl2.cert
| |-- ctrl3.cert
| `-- root.cert
|-- crlnumber
|-- crls
|-- index.txt
|-- index.txt.attr
|-- keys
| |-- ctrl1.key
| |-- ctrl2.key
| |-- ctrl3.key
| `-- root.key
`-- serial
17 directories, 51 files
And on each controller container, I move those certificates into the controller’s actual root PKI directory (I download openziti-controller before doing it, so it will erase the create root pki with rootca and intermediate controller):
function load_files_for_ha {
while [ "$(find /controller_certs/pki/"$CONTROLLER_NAME" -mindepth 1 -maxdepth 1 2>/dev/null | wc -l)" -lt 6 ]; do
echo "Certificates for $CONTROLLER_NAME not found, retrying..."
sleep 1
done
echo "Certificates for $CONTROLLER_NAME found ✅"
rm -rf ./pki/*
mkdir -p ./pki/"$CONTROLLER_NAME"
mkdir -p ./pki/root
if ! cp -Rv /controller_certs/pki/"$CONTROLLER_NAME"/* ./pki/"$CONTROLLER_NAME"; then
echo "Failed to copy certificates for $CONTROLLER_NAME ❌"
exit 1
fi
if ! cp -Rv /controller_certs/pki/root/* ./pki/root/; then
echo "Failed to copy certificates for ROOTCA ❌"
exit 1
fi
#mv ./pki/intermediate/certs/"$CONTROLLER_NAME".cert ./pki/intermediate/certs/intermediate.cert
#mv ./pki/intermediate/certs/"$CONTROLLER_NAME".chain.pem ./pki/intermediate/certs/intermediate.chain.pem
#mv ./pki/intermediate/keys/"$CONTROLLER_NAME".key ./pki/intermediate/keys/intermediate.key
# Edit the controller config.yml
mkdir -p /var/lib/private/ziti-controller/cluster
# enable clustering
echo -e "cluster:\n dataDir: /var/lib/private/ziti-controller/cluster" >> config.yml
# Replacing paths to use the copied certs
sed -i 's|pki/root/certs/root.cert|pki/root/certs/root.cert|g' config.yml
sed -i 's|pki/intermediate/certs/client.chain.pem|pki/'"$CONTROLLER_NAME"'/certs/client.chain.pem|g' config.yml
sed -i 's|pki/intermediate/certs/server.chain.pem|pki/'"$CONTROLLER_NAME"'/certs/server.chain.pem|g' config.yml
sed -i 's|pki/intermediate/keys/server.key|pki/'"$CONTROLLER_NAME"'/keys/server.key|g' config.yml
sed -i 's|pki/intermediate/certs/intermediate.cert|pki/'"$CONTROLLER_NAME"'/certs/'"$CONTROLLER_NAME"'.cert|g' config.yml
sed -i 's|pki/intermediate/keys/intermediate.key|pki/'"$CONTROLLER_NAME"'/keys/'"$CONTROLLER_NAME"'.key|g' config.yml
}
While updating the paths to point to the new certificates:
[...]
db: "/var/lib/private/ziti-controller/bbolt.db"
identity:
cert: "pki/ctrl1/certs/client.chain.pem"
server_cert: "pki/ctrl1/certs/server.chain.pem"
key: "pki/ctrl1/keys/server.key"
ca: "pki/root/certs/root.cert"
#alt_server_certs:
# - server_cert: ""
# server_key: ""
[...]
web:
- name: client-management
bindPoints:
- interface: 0.0.0.0:6262
address: controller1:6262
identity:
ca: "pki/root/certs/root.cert"
key: "pki/ctrl1/keys/server.key"
server_cert: "pki/ctrl1/certs/server.chain.pem"
cert: "pki/ctrl1/certs/client.chain.pem"
options:
idleTimeout: 5000ms
readTimeout: 5000ms
writeTimeout: 100000ms
minTLSVersion: TLS1.2
maxTLSVersion: TLS1.3
apis:
- binding: edge-management
options: { }
- binding: edge-client
options: { }
- binding: fabric
options: { }
- binding: edge-oidc
options: { }
- binding: zac
options:
location: /opt/openziti/share/console
indexFile: index.html
cluster:
dataDir: /var/lib/private/ziti-controller/cluster
After loading the certificates and editing the configuration file, I tried to figure out why this error occurred:
root@controller1:/# ziti agent cluster add tls:controller2:6262
cluster add failed: unable to dial tls:controller2:6262: remote error: tls: error decrypting message
Logs of controller1:
{"_context":"tls:controller2:6262","error":"remote error: tls: error decrypting message","file":"github.com/openziti/channel/v4@v4.2.35/classic_dialer.go:96","func":"github.com/openziti/channel/v4.(*classicDialer).CreateWithHeaders","level":"warning","msg":"error initiating channel with hello","time":"2025-11-21T15:02:23.359Z"}
{"file":"github.com/openziti/channel/v4@v4.2.35/message.go:732","func":"github.com/openziti/channel/v4.getRetryVersionFor","level":"info","msg":"defaulting to version 2","time":"2025-11-21T15:02:23.359Z"}
{"_context":"tls:controller2:6262","file":"github.com/openziti/channel/v4@v4.2.35/classic_dialer.go:100","func":"github.com/openziti/channel/v4.(*classicDialer).CreateWithHeaders","level":"warning","msg":"Retrying dial with protocol version 2","time":"2025-11-21T15:02:23.359Z"}
{"_context":"ch{agent}-\u003eu{existing}-\u003ei{ABMD}","file":"github.com/openziti/ziti/common/handler_common/common.go:34","func":"github.com/openziti/ziti/common/handler_common.SendOpResult","level":"error","msg":"agent error performing cluster.add-peer: (unable to dial tls:controller2:6262: remote error: tls: error decrypting message)","operation":"cluster.add-peer","time":"2025-11-21T15:02:23.387Z"}
Logs of controller2:
{"_context":"tls:0.0.0.0:6262","error":"tls: invalid signature by the client certificate: crypto/rsa: verification error","file":"github.com/openziti/transport/v2@v2.0.193/tls/listener.go:260","func":"github.com/openziti/transport/v2/tls.(*sharedListener).processConn","level":"error","msg":"handshake failed","remote":"172.30.2.5:41816","time":"2025-11-21T15:02:23.359Z"}
{"_context":"tls:0.0.0.0:6262","error":"tls: invalid signature by the client certificate: crypto/rsa: verification error","file":"github.com/openziti/transport/v2@v2.0.193/tls/listener.go:260","func":"github.com/openziti/transport/v2/tls.(*sharedListener).processConn","level":"error","msg":"handshake failed","remote":"172.30.2.5:41820","time":"2025-11-21T15:02:23.387Z"}
And to help with debugging, here is the complete tree of the shared volume (middle) and the certificates automatically loaded in each controller’s root PKI (left: controller1, right: controller2):
Do you have an idea of what could be the issue? I know the HA Controller is still in Beta mode and I didn’t saw any error like that on the discourse…
Thanks
