Our application is connected to a separate hazelcast connection to the cluster as clients. We experienced outages to some of the hazelcast nodes leading to no leader elected.
I've investigated the LeaderInitiator and found a couple of issues with the current implementation.
after the client node became leader. This can be problematic if the cluster lost any information about the lock or someone force_unlocked the lock. This then likely results in regranting the lock to another node (having 2 leaders). It might be better to regularly check if the cluster is still hold by the node and if not fire the onRevoked event and try to lock again.
ClientInvocation{clientMessageType=1800, target=partition 258, sendConnection=ClientConnection{alive=false, connectionId=3, socketChannel=DefaultSocketChannelWrapper{socketChannel=java.nio.channels.SocketChannel[closed]}, remoteEndpoint=[10.100.72.10]:5701, lastReadTime=2018-01-22 14:50:08.804, lastWriteTime=2018-01-22 14:50:08.811, closedTime=2018-01-22 14:50:08.804, lastHeartbeatRequested=2018-01-22 14:06:36.623, lastHeartbeatReceived=2018-01-22 14:06:36.625, connected server version=3.8.6}} timed out by 2486800 ms",
"message": "ClientInvocation{clientMessageType=1800, target=partition 258, sendConnection=ClientConnection{alive=false, connectionId=3, socketChannel=DefaultSocketChannelWrapper{socketChannel=java.nio.channels.SocketChannel[closed]}, remoteEndpoint=[10.100.72.10]:5701, lastReadTime=2018-01-22 14:50:08.804, lastWriteTime=2018-01-22 14:50:08.811, closedTime=2018-01-22 14:50:08.804, lastHeartbeatRequested=2018-01-22 14:06:36.623, lastHeartbeatReceived=2018-01-22 14:06:36.625, connected server version=3.8.6}} timed out by 2486800 ms",
"name": "com.hazelcast.core.OperationTimeoutException",
"cause": {
"commonElementCount": 0,
"localizedMessage": "Connection closed by the other side",
"message": "Connection closed by the other side",
"name": "com.hazelcast.spi.exception.TargetDisconnectedException",
"cause": {
"commonElementCount": 0,
"localizedMessage": "Remote socket closed!",
"message": "Remote socket closed!",
"name": "java.io.EOFException",
"extendedStackTrace": [
{
"class": "com.hazelcast.internal.networking.nonblocking.NonBlockingSocketReader",
"method": "handle",
"file": "NonBlockingSocketReader.java",
"line": 153,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.internal.networking.nonblocking.NonBlockingIOThread",
"method": "handleSelectionKey",
"file": "NonBlockingIOThread.java",
"line": 349,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.internal.networking.nonblocking.NonBlockingIOThread",
"method": "handleSelectionKeys",
"file": "NonBlockingIOThread.java",
"line": 334,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.internal.networking.nonblocking.NonBlockingIOThread",
"method": "selectLoop",
"file": "NonBlockingIOThread.java",
"line": 252,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.internal.networking.nonblocking.NonBlockingIOThread",
"method": "run",
"file": "NonBlockingIOThread.java",
"line": 205,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
}
]
},
"extendedStackTrace": [
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationServiceSupport$CleanResourcesTask",
"method": "notifyException",
"file": "ClientInvocationServiceSupport.java",
"line": 229,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationServiceSupport$CleanResourcesTask",
"method": "run",
"file": "ClientInvocationServiceSupport.java",
"line": 214,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "java.util.concurrent.Executors$RunnableAdapter",
"method": "call",
"file": "Executors.java",
"line": 511,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.FutureTask",
"method": "runAndReset",
"file": "FutureTask.java",
"line": 308,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask",
"method": "access$301",
"file": "ScheduledThreadPoolExecutor.java",
"line": 180,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask",
"method": "run",
"file": "ScheduledThreadPoolExecutor.java",
"line": 294,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "com.hazelcast.util.executor.LoggingScheduledExecutor$LoggingDelegatingFuture",
"method": "run",
"file": "LoggingScheduledExecutor.java",
"line": 140,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "java.util.concurrent.ThreadPoolExecutor",
"method": "runWorker",
"file": "ThreadPoolExecutor.java",
"line": 1142,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ThreadPoolExecutor$Worker",
"method": "run",
"file": "ThreadPoolExecutor.java",
"line": 617,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.lang.Thread",
"method": "run",
"file": "Thread.java",
"line": 745,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "com.hazelcast.util.executor.HazelcastManagedThread",
"method": "executeRun",
"file": "HazelcastManagedThread.java",
"line": 64,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.util.executor.HazelcastManagedThread",
"method": "run",
"file": "HazelcastManagedThread.java",
"line": 80,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
}
]
},
"extendedStackTrace": [
{
"class": "com.hazelcast.client.spi.impl.ClientInvocation",
"method": "notifyException",
"file": "ClientInvocation.java",
"line": 203,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationServiceSupport$CleanResourcesTask",
"method": "notifyException",
"file": "ClientInvocationServiceSupport.java",
"line": 234,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationServiceSupport$CleanResourcesTask",
"method": "run",
"file": "ClientInvocationServiceSupport.java",
"line": 214,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "java.util.concurrent.Executors$RunnableAdapter",
"method": "call",
"file": "Executors.java",
"line": 511,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.FutureTask",
"method": "runAndReset",
"file": "FutureTask.java",
"line": 308,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask",
"method": "access$301",
"file": "ScheduledThreadPoolExecutor.java",
"line": 180,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask",
"method": "run",
"file": "ScheduledThreadPoolExecutor.java",
"line": 294,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "com.hazelcast.util.executor.LoggingScheduledExecutor$LoggingDelegatingFuture",
"method": "run",
"file": "LoggingScheduledExecutor.java",
"line": 140,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "java.util.concurrent.ThreadPoolExecutor",
"method": "runWorker",
"file": "ThreadPoolExecutor.java",
"line": 1142,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.util.concurrent.ThreadPoolExecutor$Worker",
"method": "run",
"file": "ThreadPoolExecutor.java",
"line": 617,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "java.lang.Thread",
"method": "run",
"file": "Thread.java",
"line": 745,
"exact": false,
"location": "?",
"version": "1.8.0_112"
},
{
"class": "com.hazelcast.util.executor.HazelcastManagedThread",
"method": "executeRun",
"file": "HazelcastManagedThread.java",
"line": 64,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.util.executor.HazelcastManagedThread",
"method": "run",
"file": "HazelcastManagedThread.java",
"line": 80,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "------ submitted from ------",
"line": -1,
"exact": false,
"location": "?",
"version": "?"
},
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationFuture",
"method": "resolveAndThrowIfException",
"file": "ClientInvocationFuture.java",
"line": 95,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.spi.impl.ClientInvocationFuture",
"method": "resolveAndThrowIfException",
"file": "ClientInvocationFuture.java",
"line": 32,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.spi.impl.AbstractInvocationFuture",
"method": "get",
"file": "AbstractInvocationFuture.java",
"line": 155,
"exact": false,
"location": "hazelcast-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.spi.ClientProxy",
"method": "invokeOnPartition",
"file": "ClientProxy.java",
"line": 170,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.proxy.PartitionSpecificClientProxy",
"method": "invokeOnPartition",
"file": "PartitionSpecificClientProxy.java",
"line": 47,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.proxy.ClientLockProxy",
"method": "tryLock",
"file": "ClientLockProxy.java",
"line": 145,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "com.hazelcast.client.proxy.ClientLockProxy",
"method": "tryLock",
"file": "ClientLockProxy.java",
"line": 135,
"exact": false,
"location": "hazelcast-client-3.8.6.jar!/",
"version": "3.8.6"
},
{
"class": "customized.LeaderInitiator$LeaderSelector",
"method": "call",
"file": "LeaderInitiator.java",
"line": 251,
"exact": true,
"version": "?"
}