准备工作
安装3台centos7 服务器
配置名字hd01\hd02\hd03
配置网络static
关闭防火墙
hostnamectl set-hostname hadoop01 vim /etc/hosts/ vim /etc/sysconfig/network-scripts/ifcfg-ens33 systemctl stop firewalld.service #但是开机之后还会启动防火墙 systemctl disable firewalld.service #禁止firewall开机启动 systemctl restart network #依次配置3个端口 220 230 240
#hd01 做ssh 公私钥 无秘 ssh-keygen -t rsa -P '' # copy 公钥到 hd02 hd03 ssh-copy-id root@hd01 yes ok ssh-copy-id root@hd02 yes ok ssh-copy-id root@hd03 yes ok
# 安装chrony yum -y install chrony #配置chrony 注释掉server 0.centos.pool.ntp.org iburst vim /etc/chrony.conf server ntp1.aliyun.com server ntp2.aliyun.com server ntp3.aliyun.com #测试时间 date #启动chrony systemctl start chronyd 7. 安装wget yum install -y wget #8. 安装psmisc(linux命令工具包 namenode主备切换时要用到 只需要安装在两个namenode节点上) yum install -y psmisc
cd /opt/ ls tar -zxf zookeeper-3.4.5-cdh5.14.2.tar.gz mv zookeeper-3.4.5-cdh5.14.2 soft/zk345 cd soft/zk345/conf/ cp zoo_sample.cfg zoo.cfg vim zoo.cfg ================1 #修改 dataDir=/opt/soft/zk345/data #配置 第一个端口 使用zk 第二个端口选领导 server.1=hd01:2888:2888 server.1=hd02:2888:2888 server.1=hd03:2888:2888 ================1 cd .. mkdir data cd data/ echo "1" > myid cat myid #出现1 说明成功!!! cd .. cd .. ls scp -r zk345/ root@hd02:/opt/soft/ scp -r zk345/ root@hd03:/opt/soft/ #进入hd02窗口 cd /opt/soft/ ls vim zk345/data/myid #修改1 2 #进入hd03窗口 cd /opt/soft/ ls vim zk345/data/myid #修改1 33个同时修改环境变量(3个集群同步)
vim /etc/profile #ZK ENV export ZOOKEEPER_HOME=/opt/soft/zk345 export PATH=$PATH:$ZOOKEEPER_HOME/bin #激活 source /etc/profile zkServer.sh start jps安装hadoop集群(写在---hd01)
cd /opt/ ls tar -zxf had mv had soft/hadoop260 cd soft/hadoop260 tmp mkdir -p dfs/journalnode_data mkdir -p dfs/edits mkdir -p dfs/datanode_data mkdir -p dfs/namenode_data ls cd dfs/ ls 2. 配置hadoop-env.sh vim etc/hadoop/hadoop-env.sh =======================1 JAVA_HOME=/opt/soft/jdk180 HADOOP_CONF_DIR=/opt/soft/hadoop260/etc/hadoop :wq =======================1 #3. 配置core-site.xml vim core-site.xml =======================2 #无中文版 fs.defaultFS hdfs://hacluster hadoop.tmp.dir file:///opt/soft/hadoop260/tmp io.file.buffer.size 4096 ha.zookeeper.quorum hd01:2181,hd02:2181,hd03:2181 hadoop.proxyuser.root.hosts * hadoop.proxyuser.root.groups * :wq =======================2 #4. 配置hdfs-site.xml =======================3 fs.defaultFS hdfs://hacluster hadoop.tmp.dir file:///opt/soft/hadoop260/tmp io.file.buffer.size 4096 ha.zookeeper.quorum hd01:2181,hd02:2181,hd03:2181 hadoop.proxyuser.root.hosts * hadoop.proxyuser.root.groups * #无中文版(配置中文注释有可能报错) dfs.block.size 134217728 dfs.replication 3 dfs.name.dir file:///opt/soft/hadoop260/dfs/namenode_data dfs.data.dir file:///opt/soft/hadoop260/dfs/datanode_data dfs.webhdfs.enabled true dfs.datanode.max.transfer.threads 4096 dfs.nameservices hacluster dfs.ha.namenodes.hacluster nn1,nn2 dfs.namenode.rpc-address.hacluster.nn1 hd01:9000 dfs.namenode.servicepc-address.hacluster.nn1 hd01:53310 dfs.namenode.http-address.hacluster.nn1 hd01:50070 dfs.namenode.rpc-address.hacluster.nn2 hd02:9000 dfs.namenode.servicepc-address.hacluster.nn2 hd02:53310 dfs.namenode.http-address.hacluster.nn2 hd02:50070 dfs.namenode.shared.edits.dir qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster dfs.journalnode.edits.dir /opt/soft/hadoop260/dfs/journalnode_data dfs.namenode.edits.dir /opt/soft/hadoop260/dfs/edits dfs.ha.automatic-failover.enabled true dfs.client.failover.proxy.provider.hacluster org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProx yProvider dfs.ha.fencing.methods sshfence dfs.ha.fencing.ssh.private-key-files /root/.ssh/id_rsa dfs.premissions false =======================3 5. 配置mapper-site.xml cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml vim mapred-site.xml =======================4 dfs.block.size 134217728 dfs.replication 3 dfs.name.dir file:///opt/soft/hadoop260/dfs/namenode_data dfs.data.dir file:///opt/soft/hadoop260/dfs/datanode_data dfs.webhdfs.enabled true dfs.datanode.max.transfer.threads 4096 dfs.nameservices hacluster dfs.ha.namenodes.hacluster nn1,nn2 dfs.namenode.rpc-address.hacluster.nn1 hd01:9000 dfs.namenode.servicepc-address.hacluster.nn1 hd01:53310 dfs.namenode.http-address.hacluster.nn1 hd01:50070 dfs.namenode.rpc-address.hacluster.nn2 hd02:9000 dfs.namenode.servicepc-address.hacluster.nn2 hd02:53310 dfs.namenode.http-address.hacluster.nn2 hd02:50070 dfs.namenode.shared.edits.dir qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster dfs.journalnode.edits.dir /opt/soft/hadoop260/dfs/journalnode_data dfs.namenode.edits.dir /opt/soft/hadoop260/dfs/edits dfs.ha.automatic-failover.enabled true dfs.client.failover.proxy.provider.hacluster org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider dfs.ha.fencing.methods sshfence dfs.ha.fencing.ssh.private-key-files /root/.ssh/id_rsa dfs.premissions false =======================4 6. 配置yarn-site.xml =======================5 mapreduce.framework.name yarn mapreduce.jobhistory.address hd01:10020 mapreduce.jobhistory.webapp.address hd01:19888 mapreduce.job.ubertask.enable true #无中文版 yarn.resourcemanager.ha.enabled true yarn.resourcemanager.cluster-id hayarn yarn.resourcemanager.ha.rm-ids rm1,rm2 yarn.resourcemanager.hostname.rm1 hd02 yarn.resourcemanager.hostname.rm2 hd03 yarn.resourcemanager.zk-address hd01:2181,hd02:2181,hd03:2181 yarn.resourcemanager.recovery.enabled true yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store yarn.resourcemanager.hostname hd03 yarn.nodemanager.aux-services mapreduce_shuffle yarn.log-aggregation-enable true yarn.log-aggregation.retain-seconds 604800 =======================5 yarn.resourcemanager.ha.enabled true yarn.resourcemanager.cluster-id hayarn yarn.resourcemanager.ha.rm-ids rm1,rm2 yarn.resourcemanager.hostname.rm1 hd02 yarn.resourcemanager.hostname.rm2 hd03 yarn.resourcemanager.zk-address hd01:2181,hd02:2181,hd03:2181 yarn.resourcemanager.recovery.enabled true yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store yarn.resourcemanager.hostname hd03 yarn.nodemanager.aux-services mapreduce_shuffle yarn.log-aggregation-enable true yarn.log-aggregation.retain-seconds 604800
编辑集群标准文件
vim etc/hadoop/slaves #删除localhost hd01 hd02 hd03 :wq cd.. scp -r hadoop260/ root@hd02:/opt/soft/ scp -r hadoop260/ root@hd03:/opt/soft/
#hadoop env export HADOOP_HOME=/opt/soft/hadoop260 export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin export HADOOP_INSTALL=$HADOOP_HOME #激活 source /etc/profile
zkServer.sh start
出现问题:hadoop-deamon.sh: 未找到命令!,找不到或无法加载主类 journode(注意单词拼写!)
解决方法:
hadoop-daemon.sh start journalnode jps
hdfs namenode -format
scp -r /opt/soft/hadoop260/dfs/namenode_data/current/ root@hd02:/opt/soft/hadoop260/dfs/namenode_data/
hdfs zkfc -formatZK
start-dfs.sh jps
start-yarn.sh jps
mr-jobhistory-daemon.sh start historyserver
yarn-daemon.sh start resourcemanager
hdfs haadmin -getServiceState nn1
hdfs haadmin -getServiceState nn2
hadoop-daemon.sh start namenode