20,807
社区成员
发帖
与我相关
我的任务
分享
主机名 IP 安装的软件 运行的进程
hadoop1 192.168.1.181 jdk、hadoop NameNode、DFSZKFailoverController(zkfc)
hadoop2 192.168.1.182 jdk、hadoop NameNode、DFSZKFailoverController(zkfc)
hadoop3 192.168.1.183 jdk、hadoop ResourceManager
hadoop4 192.168.1.184 jdk、hadoop ResourceManager
hadoop5 192.168.1.185 jdk、hadoop、zookeeper DataNode、NodeManager、JournalNode、QuorumPeerMain
hadoop6 192.168.1.186 jdk、hadoop、zookeeper DataNode、NodeManager、JournalNode、QuorumPeerMain
hadoop7 192.168.1.187 jdk、hadoop、zookeeper DataNode、NodeManager、JournalNode、QuorumPeerMain
我想配置两个ResourceManager节点
结果在hadoop1使用 start-all.sh 后发现 hadoop1,hadoop2 的NameNode,还有hadoop3,hadoop7 的ResourceManager节点没有起来,其他进程正常
日志错误
2015-09-10 11:04:47,411 FATAL org.apache.hadoop.hdfs.server.namenode.FSEditLog: Error: recoverUnfinalizedSegments failed for required journal (JournalAndStream(mgr=QJM to [192.168.1.184:8485, 192.168.1.185:8485, 192.168.1.186:8485], stream=null))
org.apache.hadoop.hdfs.qjournal.client.QuorumException: Got too many exceptions to achieve quorum size 2/3. 3 exceptions thrown:
192.168.1.185:8485: Call From hadoop1/192.168.1.181 to hadoop5:8485 failed on connection exception: java.net.ConnectException: 拒绝连接; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
192.168.1.186:8485: Call From hadoop1/192.168.1.181 to hadoop6:8485 failed on connection exception: java.net.ConnectException: 拒绝连接; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
192.168.1.184:8485: Call From hadoop1/192.168.1.181 to hadoop4:8485 failed on connection exception: java.net.ConnectException: 拒绝连接; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
at org.apache.hadoop.hdfs.qjournal.client.QuorumException.create(QuorumException.java:81)
at org.apache.hadoop.hdfs.qjournal.client.QuorumCall.rethrowException(QuorumCall.java:223)
at org.apache.hadoop.hdfs.qjournal.client.AsyncLoggerSet.waitForWriteQuorum(AsyncLoggerSet.java:142)
at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.createNewUniqueEpoch(QuorumJournalManager.java:182)
at org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager.recoverUnfinalizedSegments(QuorumJournalManager.java:436)
at org.apache.hadoop.hdfs.server.namenode.JournalSet$8.apply(JournalSet.java:624)
at org.apache.hadoop.hdfs.server.namenode.JournalSet.mapJournalsAndReportErrors(JournalSet.java:393)
at org.apache.hadoop.hdfs.server.namenode.JournalSet.recoverUnfinalizedSegments(JournalSet.java:621)
at org.apache.hadoop.hdfs.server.namenode.FSEditLog.recoverUnclosedStreams(FSEditLog.java:1439)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startActiveServices(FSNamesystem.java:1112)
at org.apache.hadoop.hdfs.server.namenode.NameNode$NameNodeHAContext.startActiveServices(NameNode.java:1710)
at org.apache.hadoop.hdfs.server.namenode.ha.ActiveState.enterState(ActiveState.java:61)
at org.apache.hadoop.hdfs.server.namenode.ha.HAState.setStateInternal(HAState.java:64)
at org.apache.hadoop.hdfs.server.namenode.ha.StandbyState.setState(StandbyState.java:49)
at org.apache.hadoop.hdfs.server.namenode.NameNode.transitionToActive(NameNode.java:1583)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.transitionToActive(NameNodeRpcServer.java:1472)
at org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB.transitionToActive(HAServiceProtocolServerSideTranslatorPB.java:107)
at org.apache.hadoop.ha.proto.HAServiceProtocolProtos$HAServiceProtocolService$2.callBlockingMethod(HAServiceProtocolProtos.java:4460)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:969)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1657)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043)
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<!-- ns1下面有两个NameNode,分别是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>192.168.1.181:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>192.168.1.181:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>192.168.1.182:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>192.168.1.182:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 依赖ZK-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://192.168.1.184:8485;192.168.1.185:8485;192.168.1.186:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/hadoop-2.7.1/journal</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现类 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,每个机制占用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 20秒 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>20000</value>
</property>
</configuration>
<?xml version="1.0"?>
<configuration>
<!-- 开启RM高可靠 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>192.168.1.183</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>192.168.1.187</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>192.168.1.184:2181,192.168.1.185:2181,192.168.1.186:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/usr/local/zookeeper/zookeeper-3.4.6/data
dataLogDir=/usr/local/zookeeper/zookeeper-3.4.6/log
clientPort=2181
# pc
server.1=192.168.1.184:2888:3888
server.2=192.168.1.185:2888:3888
server.3=192.168.1.186:2888:3888
//我所有节点这是这样配置的
//我觉得应该是这个配置不对,大神们指点下
192.168.1.184
192.168.1.185
192.168.1.186