Node in cluster down brings instability to the system during 5 minutes

Hello,

We are running a cluster of 3 machines in production of keycloak 10.0.2 in AWS.
We had a machine stop responding suddenly probably due to AWS datacenter problems.
This caused big instability since the other 2 keycloaks kept trying to sync distributed cache to the unreachable machine for 5 minutes, and all the requests were failing with 500. After that, the two remaining keycloaks seemingly ignored the unreachable machine and everything was OK.

Is there a configuration that we can apply so that they don’t keep trying for so long? 5 minutes is a bit too much.

We are using JDBC_PING:

embed-server --server-config=standalone-ha.xml --std-out=echo
batch

/subsystem=infinispan/cache-container=keycloak/distributed-cache=sessions:remove
/subsystem=infinispan/cache-container=keycloak/replicated-cache=sessions:add()
/subsystem=infinispan/cache-container=keycloak/replicated-cache=sessions:write-attribute(name="mode",value="SYNC")

/subsystem=infinispan/cache-container=keycloak/distributed-cache=authenticationSessions:remove
/subsystem=infinispan/cache-container=keycloak/distributed-cache=offlineSessions:remove
/subsystem=infinispan/cache-container=keycloak/distributed-cache=clientSessions:remove
/subsystem=infinispan/cache-container=keycloak/distributed-cache=offlineClientSessions:remove
/subsystem=infinispan/cache-container=keycloak/distributed-cache=loginFailures:remove

/subsystem=infinispan/cache-container=keycloak/distributed-cache=authenticationSessions:add(mode="SYNC",owners="3")
/subsystem=infinispan/cache-container=keycloak/distributed-cache=offlineSessions:add(mode="SYNC",owners="3")
/subsystem=infinispan/cache-container=keycloak/distributed-cache=clientSessions:add(mode="SYNC",owners="3")
/subsystem=infinispan/cache-container=keycloak/distributed-cache=offlineClientSessions:add(mode="SYNC",owners="3")
/subsystem=infinispan/cache-container=keycloak/distributed-cache=loginFailures:add(mode="SYNC",owners="3")

# To solve work cache issue present at Infinispan https://issues.redhat.com/browse/JDG-987
/subsystem=infinispan/cache-container=keycloak/replicated-cache=work:remove
/subsystem=infinispan/cache-container=keycloak/replicated-cache=work:add()
/subsystem=infinispan/cache-container=keycloak/replicated-cache=work/component=expiration:add()
/subsystem=infinispan/cache-container=keycloak/replicated-cache=work/component=expiration/:write-attribute(name=lifespan, value=60000)

/subsystem=jgroups/stack=tcp:remove()
/subsystem=jgroups/stack=tcp:add()
/subsystem=jgroups/stack=tcp/transport=TCP:add(socket-binding="jgroups-tcp")
/subsystem=jgroups/stack=tcp/transport=TCP/property=external_addr/:add(value={{ keycloak.advertised_addr }})
/subsystem=jgroups/stack=tcp/transport=TCP/property=bind_addr:add(value={{ keycloak.advertised_addr }})

/subsystem=jgroups/stack=tcp/protocol=JDBC_PING:add()
/subsystem=jgroups/stack=tcp/protocol=JDBC_PING/property=datasource_jndi_name:add(value=java:jboss/datasources/KeycloakDS)
/subsystem=jgroups/stack=tcp/protocol=JDBC_PING/property=initialize_sql:add(value="CREATE TABLE IF NOT EXISTS JGROUPSPING (own_addr varchar(200) NOT NULL, cluster_name varchar(200) NOT NULL, ping_data bytea DEFAULT NULL, PRIMARY KEY (own_addr, cluster_name))")
/subsystem=jgroups/stack=tcp/protocol=JDBC_PING/property=info_writer_sleep_time:add(value=500)

/subsystem=jgroups/stack=tcp/protocol=MERGE3:add()
/subsystem=jgroups/stack=tcp/protocol=FD_SOCK:add(socket-binding="jgroups-tcp-fd")
/subsystem=jgroups/stack=tcp/protocol=FD_SOCK/property=external_addr/:add(value={{ keycloak.advertised_addr }})
/subsystem=jgroups/stack=tcp/protocol=FD:add()
/subsystem=jgroups/stack=tcp/protocol=VERIFY_SUSPECT:add()
/subsystem=jgroups/stack=tcp/protocol=pbcast.NAKACK2:add()
/subsystem=jgroups/stack=tcp/protocol=UNICAST3:add()
/subsystem=jgroups/stack=tcp/protocol=pbcast.STABLE:add()
/subsystem=jgroups/stack=tcp/protocol=pbcast.GMS:add()
/subsystem=jgroups/stack=tcp/protocol=pbcast.GMS/property=max_join_attempts:add(value=10)
/subsystem=jgroups/stack=tcp/protocol=pbcast.GMS/property=join_timeout:add(value=5000)
/subsystem=jgroups/stack=tcp/protocol=MFC:add()
/subsystem=jgroups/stack=tcp/protocol=FRAG3:add()

/subsystem=jgroups/stack=udp:remove()
/subsystem=jgroups/channel=ee:write-attribute(name=stack, value=tcp)
/socket-binding-group=standard-sockets/socket-binding=jgroups-mping:remove()

# Debug logging
/subsystem=logging/logger=org.jgroups:add
/subsystem=logging/logger=org.jgroups:write-attribute(name=level,value=INFO)
/subsystem=logging/console-handler=CONSOLE/:write-attribute(name=level,value=INFO)

run-batch
stop-embedded-server