I’m helplessly trying to set up a 3 node keycloak cluster running on docker swarm and nothing seems to work. The infinispan cache cluster is not setting up properly and I’ve tried both DNS_PING and JDBC_PING.
With DNS_PING in docker swarm, each container is creating it’s own infinispan cluster. With the JDBC_PING it is working so randomly, I’m totally flabbergasted. On one occasion, the cluster is formed but only between two nodes and the third one is randomly dropping out of it with the following log:
2022-08-10 10:06:51,262 WARN [org.jgroups.protocols.pbcast.GMS] (VERIFY_SUSPECT.TimerThread-27,3d86b1839045-28784) 3d86b1839045-28784: I'm not the coordinator (or next-in-line); dropping LEAVE request
On another occasion, the cluster creation times out and each container creates its own cache:
2022-08-10 10:12:09,459 WARN [org.jgroups.protocols.pbcast.GMS] (keycloak-cache-init) ba3476a27d44-13476: JOIN(ba3476a27d44-13476) sent to c6e7dc56f483-24504 timed out (after 2000 ms), on try 9
2022-08-10 10:12:09,460 WARN [org.jgroups.protocols.pbcast.GMS] (keycloak-cache-init) ba3476a27d44-13476: too many JOIN attempts (10): becoming singleton
I delete the jgroupsping
table from the DB each time I restart the stack so this behavior is just incomprehensible to me. It is no way sustainable and I’m really struggling to find anything online that would help me resolve all these issues. I’ve got the JDBC_PING method to work on a Kubernetes cluster however not on Docker Swarm.
Could anyone share some tips?
Here is my docker stack file, I used this repository as a base:
version: "3.3"
services:
keycloak-1:
image: keycloak-17.0.1-test:latest
command: start
networks:
- keycloak
environment:
KEYCLOAK_ADMIN: admin
KEYCLOAK_ADMIN_PASSWORD: keycloak
KC_DB: postgres
KC_DB_URL_HOST: postgres
KC_DB_URL_DATABASE: keycloak
KC_DB_USERNAME: keycloak
KC_DB_PASSWORD: keycloak
KC_HTTPS_KEY_STORE_FILE: /opt/keycloak/certs/keystore.jks
KC_HTTPS_KEY_STORE_PASSWORD: test
KC_CACHE: ispn
KC_LOG_LEVEL: INFO,org.infinispan:DEBUG,org.jgroups:DEBUG
KC_HOSTNAME: localhost
KC_CACHE_CONFIG_FILE: cache-ispn-jdbc-ping.xml
KC_HTTP_RELATIVE_PATH: /auth
JGROUPS_DISCOVERY_EXTERNAL_IP: keycloak-1
secrets:
- source: keystore.jks
target: /opt/keycloak/certs/keystore.jks
mode: 0777
configs:
- source: ispn-cache-config
target: /opt/keycloak/conf/cache-ispn-jdbc-ping.xml
mode: 0644
deploy:
mode: replicated
replicas: 1
keycloak-2:
image: keycloak-17.0.1-test:latest
command: start
networks:
- keycloak
environment:
KEYCLOAK_ADMIN: admin
KEYCLOAK_ADMIN_PASSWORD: keycloak
KC_DB: postgres
KC_DB_URL_HOST: postgres
KC_DB_URL_DATABASE: keycloak
KC_DB_USERNAME: keycloak
KC_DB_PASSWORD: keycloak
KC_HTTPS_KEY_STORE_FILE: /opt/keycloak/certs/keystore.jks
KC_HTTPS_KEY_STORE_PASSWORD: test
KC_CACHE: ispn
KC_LOG_LEVEL: INFO,org.infinispan:DEBUG,org.jgroups:DEBUG
KC_HOSTNAME: localhost
KC_CACHE_CONFIG_FILE: cache-ispn-jdbc-ping.xml
KC_HTTP_RELATIVE_PATH: /auth
JGROUPS_DISCOVERY_EXTERNAL_IP: keycloak-2
secrets:
- source: keystore.jks
target: /opt/keycloak/certs/keystore.jks
mode: 0777
configs:
- source: ispn-cache-config
target: /opt/keycloak/conf/cache-ispn-jdbc-ping.xml
mode: 0644
deploy:
mode: replicated
replicas: 1
keycloak-3:
image: keycloak-17.0.1-test:latest
command: start
networks:
- keycloak
environment:
KEYCLOAK_ADMIN: admin
KEYCLOAK_ADMIN_PASSWORD: keycloak
KC_DB: postgres
KC_DB_URL_HOST: postgres
KC_DB_URL_DATABASE: keycloak
KC_DB_USERNAME: keycloak
KC_DB_PASSWORD: keycloak
KC_DB_SCHEMA: public
KC_HTTPS_KEY_STORE_FILE: /opt/keycloak/certs/keystore.jks
KC_HTTPS_KEY_STORE_PASSWORD: test
KC_CACHE: ispn
KC_LOG_LEVEL: INFO
KC_HOSTNAME: localhost
KC_CACHE_CONFIG_FILE: cache-ispn-jdbc-ping.xml
KC_HTTP_RELATIVE_PATH: /auth
JGROUPS_DISCOVERY_EXTERNAL_IP: keycloak-3
secrets:
- source: keystore.jks
target: /opt/keycloak/certs/keystore.jks
mode: 0777
configs:
- source: ispn-cache-config
target: /opt/keycloak/conf/cache-ispn-jdbc-ping.xml
mode: 0644
deploy:
mode: replicated
replicas: 1
secrets:
keystore.jks:
external: true
configs:
ispn-cache-config:
file: ./cache-ispn-jdbc-ping.xml
networks:
keycloak:
external: true
And my cache-ispn-jdbc-ping.xml
file:
<?xml version="1.0" encoding="UTF-8"?>
<infinispan
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:infinispan:config:11.0 http://www.infinispan.org/schemas/infinispan-config-11.0.xsd"
xmlns="urn:infinispan:config:11.0">
<jgroups>
<stack name="postgres-jdbc-ping-tcp" extends="tcp">
<TCP external_addr="${env.JGROUPS_DISCOVERY_EXTERNAL_IP:127.0.0.1}" />
<JDBC_PING connection_driver="org.postgresql.Driver"
connection_username="${env.KC_DB_USERNAME}" connection_password="${env.KC_DB_PASSWORD}"
connection_url="jdbc:postgresql://${env.KC_DB_URL_HOST}/${env.KC_DB_URL_DATABASE}"
initialize_sql="CREATE TABLE IF NOT EXISTS public.JGROUPSPING (own_addr varchar(200) NOT NULL, cluster_name varchar(200) NOT NULL, bind_addr varchar(200) NOT NULL, updated timestamp default current_timestamp, ping_data BYTEA, constraint PK_JGROUPSPING PRIMARY KEY (own_addr, cluster_name));"
insert_single_sql="INSERT INTO public.JGROUPSPING (own_addr, cluster_name, bind_addr, updated, ping_data) values (?, ?, '${env.JGROUPS_DISCOVERY_EXTERNAL_IP:127.0.0.1}', NOW(), ?);"
delete_single_sql="DELETE FROM public.JGROUPSPING WHERE own_addr=? AND cluster_name=?;"
select_all_pingdata_sql="SELECT ping_data, own_addr, cluster_name FROM public.JGROUPSPING WHERE cluster_name=?"
info_writer_sleep_time="500"
remove_all_data_on_view_change="true"
stack.combine="REPLACE"
stack.position="MPING" />
</stack>
</jgroups>
<cache-container name="keycloak">
<transport lock-timeout="60000" stack="postgres-jdbc-ping-tcp"/>
<local-cache name="realms">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<local-cache name="users">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<distributed-cache name="sessions" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="authenticationSessions" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="offlineSessions" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="clientSessions" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="offlineClientSessions" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<distributed-cache name="loginFailures" owners="3">
<expiration lifespan="-1"/>
</distributed-cache>
<local-cache name="authorization">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<memory max-count="10000"/>
</local-cache>
<replicated-cache name="work">
<expiration lifespan="-1"/>
</replicated-cache>
<local-cache name="keys">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<expiration max-idle="3600000"/>
<memory max-count="1000"/>
</local-cache>
<distributed-cache name="actionTokens" owners="3">
<encoding>
<key media-type="application/x-java-object"/>
<value media-type="application/x-java-object"/>
</encoding>
<expiration max-idle="-1" lifespan="-1" interval="300000"/>
<memory max-count="-1"/>
</distributed-cache>
</cache-container>
</infinispan>
With this config, only two nodes join the infinispan cluster, the third one creates its own. As you can see, the owner count in the ispn config file is set to 3 and it still doesn’t work properly.