hadoop启动集群的时候namenode部分节点报如下问题:
IPC's epoch 8 is less than the last promised epoch 9 ; journal id: mycluster
at org.apache.hadoop.hdfs.qjournal.server.Journal.checkRequest(Journal.java:473)
at org.apache.hadoop.hdfs.qjournal.server.Journal.startLogSegment(Journal.java:558)
at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.startLogSegment(JournalNodeRpcServer.java:178)
at org.apache.hadoop.hdfs.qjournal.protocolPB.QJournalProtocolServerSideTranslatorPB.startLogSegment(QJournalProtocolServerSideTranslatorPB.java:188)
at org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos$QJournalProtocolService$2.callBlockingMethod(QJournalProtocolProtos.java:26743)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:498)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1038)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1003)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:931)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1938)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2855)at org.apache.hadoop.hdfs.qjournal.client.QuorumException.create(QuorumException.java:81)
at org.apache.hadoop.hdfs.qjournal.client.QuorumCall.rethrowException(QuorumCall.java:304)
at org.apache.hadoop.hdfs.qjournal.client.AsyncLoggerSet.waitForWriteQuorum(AsyncLoggerSet.java:143)
at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.startLogSegment(QuorumJournalManager.java:435)
at org.apache.hadoop.hdfs.server.namenode.JournalSet$JournalAndStream.startLogSegment(JournalSet.java:108)
at org.apache.hadoop.hdfs.server.namenode.JournalSet$3.apply(JournalSet.java:225)
at org.apache.hadoop.hdfs.server.namenode.JournalSet.mapJournalsAndReportErrors(JournalSet.java:400)
at org.apache.hadoop.hdfs.server.namenode.JournalSet.startLogSegment(JournalSet.java:222)
at org.apache.hadoop.hdfs.server.namenode.FSEditLog.startLogSegment(FSEditLog.java:1336)
at org.apache.hadoop.hdfs.server.namenode.FSEditLog.openForWrite(FSEditLog.java:335)
at org.apache.hadoop.hdfs.server.namenode.FSEditLogAsync.openForWrite(FSEditLogAsync.java:103)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startActiveServices(FSNamesystem.java:1230)
at org.apache.hadoop.hdfs.server.namenode.NameNode$NameNodeHAContext.startActiveServices(NameNode.java:1979)
at org.apache.hadoop.hdfs.server.namenode.ha.ActiveState.enterState(ActiveState.java:61)
at org.apache.hadoop.hdfs.server.namenode.ha.HAState.setStateInternal(HAState.java:64)
at org.apache.hadoop.hdfs.server.namenode.ha.StandbyState.setState(StandbyState.java:60)
at org.apache.hadoop.hdfs.server.namenode.NameNode.transitionToActive(NameNode.java:1823)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.transitionToActive(NameNodeRpcServer.java:1714)
at org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB.transitionToActive(HAServiceProtocolServerSideTranslatorPB.java:112)
at org.apache.hadoop.ha.proto.HAServiceProtocolProtos$HAServiceProtocolService$2.callBlockingMethod(HAServiceProtocolProtos.java:5409)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:528)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:498)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1038)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1003)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:931)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1938)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2855)
解决 在hadoop的配置文件core-site.xml中新增如下配置后重启即可解决
ha.health-monitor.rpc-timeout.ms
300000