HBase 全部RS节点宕机,请帮助分析
2018-01-20T03:26:18 Region server slave6.bigdata.test.cn,16020,1507703889028 reported a fatal error:
ABORTING region server slave6.bigdata.test.cn,16020,1507703889028: IOE in log roller
Cause:
java.io.IOException: cannot get log writer
at org.apache.hadoop.hbase.wal.DefaultWALProvider.createWriter(DefaultWALProvider.java:374)
at org.apache.hadoop.hbase.regionserver.wal.FSHLog.createWriterInstance(FSHLog.java:781)
at org.apache.hadoop.hbase.regionserver.wal.FSHLog.rollWriter(FSHLog.java:746)
at org.apache.hadoop.hbase.regionserver.LogRoller.run(LogRoller.java:148)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.FileNotFoundException: Parent directory doesn't exist: /hbase/WALs/slave6.bigdata.test.cn,16020,1507703889028
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.verifyParentDir(FSDirectory.java:1979)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2739)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2671)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2555)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:735)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:408)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2351)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2347)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2345)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1804)
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1701)
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1670)
at org.apache.hadoop.hdfs.DistributedFileSystem$9.doCall(DistributedFileSystem.java:527)
at org.apache.hadoop.hdfs.DistributedFileSystem$9.doCall(DistributedFileSystem.java:523)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.createNonRecursive(DistributedFileSystem.java:523)
at org.apache.hadoop.fs.FileSystem.createNonRecursive(FileSystem.java:1134)
at org.apache.hadoop.fs.FileSystem.createNonRecursive(FileSystem.java:1110)
at org.apache.hadoop.hbase.regionserver.wal.ProtobufLogWriter.init(ProtobufLogWriter.java:90)
at org.apache.hadoop.hbase.wal.DefaultWALProvider.createWriter(DefaultWALProvider.java:363)
... 4 more
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): Parent directory doesn't exist: /hbase/WALs/slave6.bigdata.test.cn,16020,1507703889028
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.verifyParentDir(FSDirectory.java:1979)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2739)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2671)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2555)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:735)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:408)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:640)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2351)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2347)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2345)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1554)
at org.apache.hadoop.ipc.Client.call(Client.java:1498)
at org.apache.hadoop.ipc.Client.call(Client.java:1398)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
at com.sun.proxy.$Proxy16.create(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:313)
at sun.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:291)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:203)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:185)
at com.sun.proxy.$Proxy17.create(Unknown Source)
at sun.reflect.GeneratedMethodAccessor4.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.hbase.fs.HFileSystem$1.invoke(HFileSystem.java:283)
at com.sun.proxy.$Proxy18.create(Unknown Source)
at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1799)
... 14 more
2018-01-20T03:26:19 Region server slave6.bigdata.test.cn,16020,1507703889028 reported a fatal error:
ABORTING region server slave6.bigdata.test.cn,16020,1507703889028: Get list of registered region servers
Cause:
org.apache.zookeeper.KeeperException$SessionExpiredException: KeeperErrorCode = Session expired for /hbase-unsecure/rs
at org.apache.zookeeper.KeeperException.create(KeeperException.java:127)
at org.apache.zookeeper.KeeperException.create(KeeperException.java:51)
at org.apache.zookeeper.ZooKeeper.getChildren(ZooKeeper.java:1472)
at org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper.getChildren(RecoverableZooKeeper.java:292)
at org.apache.hadoop.hbase.zookeeper.ZKUtil.listChildrenAndWatchForNewChildren(ZKUtil.java:455)
at org.apache.hadoop.hbase.zookeeper.ZKUtil.listChildrenAndWatchThem(ZKUtil.java:483)
at org.apache.hadoop.hbase.replication.ReplicationTrackerZKImpl.getRegisteredRegionServers(ReplicationTrackerZKImpl.java:245)
at org.apache.hadoop.hbase.replication.ReplicationTrackerZKImpl.refreshOtherRegionServersList(ReplicationTrackerZKImpl.java:226)
at org.apache.hadoop.hbase.replication.ReplicationTrackerZKImpl.access$400(ReplicationTrackerZKImpl.java:42)
at org.apache.hadoop.hbase.replication.ReplicationTrackerZKImpl$OtherRegionServerWatcher.refreshListIfRightPath(ReplicationTrackerZKImpl.java:142)
at org.apache.hadoop.hbase.replication.ReplicationTrackerZKImpl$OtherRegionServerWatcher.nodeDeleted(ReplicationTrackerZKImpl.java:117)
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.process(ZooKeeperWatcher.java:539)
at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:534)
at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:510)
2018-01-20T03:26:19 Region server slave6.bigdata.test.cn,16020,1507703889028 reported a fatal error:
ABORTING region server slave6.bigdata.test.cn,16020,1507703889028: regionserver:16020-0x35ec011d31a0174, quorum=master1.bigdata.test.cn:2181,master3.bigdata.test.cn:2181,master2.bigdata.test.cn:2181, baseZNode=/hbase-unsecure regionserver:16020-0x35ec011d31a0174 received expired from ZooKeeper, aborting
Cause:
org.apache.zookeeper.KeeperException$SessionExpiredException: KeeperErrorCode = Session expired
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.connectionEvent(ZooKeeperWatcher.java:592)
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.process(ZooKeeperWatcher.java:524)
at org.apache.hadoop.hbase.zookeeper.PendingWatcher.process(PendingWatcher.java:40)
at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:534)
at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:510)
2018-01-20T03:26:19 Region server slave6.bigdata.test.cn,16020,1507703889028 reported a fatal error:
ABORTING region server slave6.bigdata.test.cn,16020,1507703889028: regionserver:16020-0x35ec011d31a0174, quorum=master1.bigdata.test.cn:2181,master3.bigdata.test.cn:2181,master2.bigdata.test.cn:2181, baseZNode=/hbase-unsecure regionserver:16020-0x35ec011d31a0174 received expired from ZooKeeper, aborting
Cause:
org.apache.zookeeper.KeeperException$SessionExpiredException: KeeperErrorCode = Session expired
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.connectionEvent(ZooKeeperWatcher.java:592)
at org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher.process(ZooKeeperWatcher.java:524)
at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:534)
at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:510)