• Hadoop集群搭建(包成功!!!)-----------MapReduce原理详解


     

     

     

     

     

    hadoop高可用集群搭建

    准备工作

    1. 安装3台centos7 服务器

    1. 配置名字hd01\hd02\hd03

    1. 配置网络static

    1. 关闭防火墙

    hostnamectl set-hostname hadoop01
    vim /etc/hosts/  
    vim /etc/sysconfig/network-scripts/ifcfg-ens33
    ​
    ​
    systemctl stop firewalld.service            #但是开机之后还会启动防火墙
    systemctl disable firewalld.service         #禁止firewall开机启动
    systemctl restart network
    ​
    #依次配置3个端口
    220 230 240

    配置钥匙(3个集群同步)

    #hd01 做ssh 公私钥 无秘
    ssh-keygen -t rsa -P ''
    # copy 公钥到 hd02 hd03
    ssh-copy-id root@hd01
    yes
    ok
    ssh-copy-id root@hd02
    yes
    ok
    ssh-copy-id root@hd03
    yes
    ok

    所有服务器时间同步(3个集群同步)

    # 安装chrony
    yum -y install chrony
    #配置chrony
    注释掉server 0.centos.pool.ntp.org iburst
    vim /etc/chrony.conf
    server ntp1.aliyun.com
    server ntp2.aliyun.com
    server ntp3.aliyun.com
    #测试时间
    date
    #启动chrony 
    systemctl start chronyd
    7. 安装wget
    yum install -y wget
    #8. 安装psmisc(linux命令工具包 namenode主备切换时要用到 只需要安装在两个namenode节点上)
    yum install -y psmisc
    ​

    拖入文件!!(在第一个hd01)

    cd /opt/
    ls
    tar -zxf zookeeper-3.4.5-cdh5.14.2.tar.gz
    mv zookeeper-3.4.5-cdh5.14.2 soft/zk345
    cd soft/zk345/conf/
    cp zoo_sample.cfg zoo.cfg
    vim zoo.cfg
    ================1
    #修改
    dataDir=/opt/soft/zk345/data
    #配置 第一个端口 使用zk  第二个端口选领导
    server.1=hd01:2888:2888
    server.1=hd02:2888:2888
    server.1=hd03:2888:2888
    ================1
    cd ..
    mkdir data
    cd data/
    echo "1" > myid
    cat myid
    #出现1 说明成功!!!
    ​
    cd ..
    cd ..
    ls
    scp -r zk345/ root@hd02:/opt/soft/
    scp -r zk345/ root@hd03:/opt/soft/
    #进入hd02窗口
    cd /opt/soft/
    ls
    vim zk345/data/myid
    #修改1
    2
    #进入hd03窗口
    cd /opt/soft/
    ls
    vim zk345/data/myid
    #修改1
    3

    3个同时修改环境变量(3个集群同步)

    vim /etc/profile
    #ZK ENV
    export ZOOKEEPER_HOME=/opt/soft/zk345
    export PATH=$PATH:$ZOOKEEPER_HOME/bin
    #激活
    source /etc/profile
    zkServer.sh start
    jps

    安装hadoop集群(写在---hd01)

    cd /opt/
    ls
    tar -zxf had
    mv had soft/hadoop260
    cd soft/hadoop260
    tmp
    mkdir -p dfs/journalnode_data
    mkdir -p dfs/edits
    mkdir -p dfs/datanode_data
    mkdir -p dfs/namenode_data
    ls
    cd dfs/
    ls
    ​
    2. 配置hadoop-env.sh
    vim etc/hadoop/hadoop-env.sh
    =======================1
    JAVA_HOME=/opt/soft/jdk180
    HADOOP_CONF_DIR=/opt/soft/hadoop260/etc/hadoop
    ​
    :wq
    =======================1
    #3. 配置core-site.xml
    vim core-site.xml
    =======================2
    
    
       
        fs.defaultFS 
        hdfs://hacluster     
       
      
     
         
            hadoop.tmp.dir                           
            file:///opt/soft/hadoop260/tmp 
            
         
        
         
         
            io.file.buffer.size
            4096     
         
        
         
         
            ha.zookeeper.quorum    
            hd01:2181,hd02:2181,hd03:2181 
         
        
         
         
            hadoop.proxyuser.root.hosts
            * 
         
        
         
         
            hadoop.proxyuser.root.groups 
            * 
         
    
    ​
    #无中文版
     
        fs.defaultFS 
        hdfs://hacluster     
       
      
    ​
         
            hadoop.tmp.dir                           
            file:///opt/soft/hadoop260/tmp 
            
         
            io.file.buffer.size
            4096     
         
            ha.zookeeper.quorum    
            hd01:2181,hd02:2181,hd03:2181 
         
            hadoop.proxyuser.root.hosts
            * 
         
            hadoop.proxyuser.root.groups 
            * 
         
    
    ​
    :wq
    =======================2
    #4. 配置hdfs-site.xml
    =======================3
     
         
         
        dfs.block.size
        134217728 
         
         
        
        dfs.replication 
        3
         
        
           
        dfs.name.dir 
       file:///opt/soft/hadoop260/dfs/namenode_data 
        
       
        
       dfs.data.dir 
       file:///opt/soft/hadoop260/dfs/datanode_data 
        
        
        
       dfs.webhdfs.enabled 
       true 
        
       
        
       dfs.datanode.max.transfer.threads 
       4096
       
        
        
       dfs.nameservices
       hacluster
       
       
        
       dfs.ha.namenodes.hacluster 
       nn1,nn2
        
        
       
       dfs.namenode.rpc-address.hacluster.nn1 
       hd01:9000 
       
       dfs.namenode.servicepc-address.hacluster.nn1  
       hd01:53310 
        
        
       dfs.namenode.http-address.hacluster.nn1  
       hd01:50070 
        
        
        
       dfs.namenode.rpc-address.hacluster.nn2 
       hd02:9000 
       
       
       dfs.namenode.servicepc-address.hacluster.nn2 
       hd02:53310
       
        
       dfs.namenode.http-address.hacluster.nn2 
       hd02:50070
       
       
        
       dfs.namenode.shared.edits.dir 
        qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster 
         
        
         
        dfs.journalnode.edits.dir 
        /opt/soft/hadoop260/dfs/journalnode_data 
         
         
        
        dfs.namenode.edits.dir
        /opt/soft/hadoop260/dfs/edits 
          
        
        dfs.ha.automatic-failover.enabled
        true 
         
         
        
        dfs.client.failover.proxy.provider.hacluster 
        org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProx yProvider 
         
         
        
        dfs.ha.fencing.methods 
        sshfence 
         
         
         
        dfs.ha.fencing.ssh.private-key-files 
        /root/.ssh/id_rsa
         
         
         
        dfs.premissions
        false
         
    #无中文版(配置中文注释有可能报错)
     
         
        
        dfs.block.size
        134217728 
         
         
    ​
        dfs.replication 
        3
         
        
    ​
        dfs.name.dir 
       file:///opt/soft/hadoop260/dfs/namenode_data 
        
       
    ​
       dfs.data.dir 
       file:///opt/soft/hadoop260/dfs/datanode_data 
        
        
    ​
       dfs.webhdfs.enabled 
       true 
        
       
    ​
       dfs.datanode.max.transfer.threads 
       4096
       
        
     
       dfs.nameservices
       hacluster
       
       
    ​
       dfs.ha.namenodes.hacluster 
       nn1,nn2
        
    ​
       
       dfs.namenode.rpc-address.hacluster.nn1 
       hd01:9000 
       
       dfs.namenode.servicepc-address.hacluster.nn1  
       hd01:53310 
        
        
       dfs.namenode.http-address.hacluster.nn1  
       hd01:50070 
        
    ​
        
       dfs.namenode.rpc-address.hacluster.nn2 
       hd02:9000 
       
       
       dfs.namenode.servicepc-address.hacluster.nn2 
       hd02:53310
       
        
       dfs.namenode.http-address.hacluster.nn2 
       hd02:50070
       
       
      
       dfs.namenode.shared.edits.dir 
        qjournal://hd01:8485;hd02:8485;hd03:8485/hacluster 
         
        
    ​
        dfs.journalnode.edits.dir 
        /opt/soft/hadoop260/dfs/journalnode_data 
         
         
       
        dfs.namenode.edits.dir
        /opt/soft/hadoop260/dfs/edits 
          
      
        dfs.ha.automatic-failover.enabled
        true 
         
         
     
        dfs.client.failover.proxy.provider.hacluster 
            org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider 
         
         
    ​
        dfs.ha.fencing.methods 
        sshfence 
         
         
     
        dfs.ha.fencing.ssh.private-key-files 
        /root/.ssh/id_rsa
         
        dfs.premissions
        false
         
    
    =======================3
    5. 配置mapper-site.xml
    cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
    vim mapred-site.xml
    =======================4
     
    
     
    mapreduce.framework.name 
    yarn   
     
    mapreduce.jobhistory.address 
    hd01:10020
    
     
     mapreduce.jobhistory.webapp.address
    hd01:19888
     
    
     
    mapreduce.job.ubertask.enable
    true 
    
    
    =======================4
    6. 配置yarn-site.xml
    =======================5
    
     
     
    yarn.resourcemanager.ha.enabled 
    true 
     
    
     
    yarn.resourcemanager.cluster-id 
    hayarn  
     
     
    yarn.resourcemanager.ha.rm-ids
    rm1,rm2
     
    
    
    yarn.resourcemanager.hostname.rm1 
    hd02 
    
     
     
    yarn.resourcemanager.hostname.rm2 
    hd03
     
    
    
    yarn.resourcemanager.zk-address
    hd01:2181,hd02:2181,hd03:2181 
     
     
    
    yarn.resourcemanager.recovery.enabled
    true 
     
     
     
    yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store  
    
     
    yarn.resourcemanager.hostname
    hd03
     
     
    
    yarn.nodemanager.aux-services 
    mapreduce_shuffle 
    
     
     
    yarn.log-aggregation-enable 
    true 
    
     
    
    yarn.log-aggregation.retain-seconds
    604800 
     
    #无中文版
    
    yarn.resourcemanager.ha.enabled 
    true 
     
    yarn.resourcemanager.cluster-id 
    hayarn  
    yarn.resourcemanager.ha.rm-ids
    rm1,rm2
     
    yarn.resourcemanager.hostname.rm1 
    hd02 
    
    yarn.resourcemanager.hostname.rm2 
    hd03
     
    yarn.resourcemanager.zk-address
    hd01:2181,hd02:2181,hd03:2181 
     
    yarn.resourcemanager.recovery.enabled
    true 
     
    yarn.resourcemanager.store.class org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMState Store  
    yarn.resourcemanager.hostname
    hd03
     
    yarn.nodemanager.aux-services 
    mapreduce_shuffle 
    
    yarn.log-aggregation-enable 
    true 
    
    yarn.log-aggregation.retain-seconds
    604800 
     
    
    =======================5

    编辑集群标准文件

    vim etc/hadoop/slaves
    #删除localhost
    hd01
    hd02
    hd03
    :wq
    ​
    cd..
    scp -r hadoop260/ root@hd02:/opt/soft/
    scp -r hadoop260/ root@hd03:/opt/soft/

    启动集群

    为3台节点配置hadoop环境变量(vi /etc/profile)

    #hadoop env
    export HADOOP_HOME=/opt/soft/hadoop260 
    export HADOOP_MAPRED_HOME=$HADOOP_HOME 
    export HADOOP_COMMON_HOME=$HADOOP_HOME 
    export HADOOP_HDFS_HOME=$HADOOP_HOME 
    export YARN_HOME=$HADOOP_HOME 
    export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native 
    export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin 
    export HADOOP_INSTALL=$HADOOP_HOME
    ​
    #激活
    source /etc/profile
    ​

    . 启动zookeeper(3个都启动)

    zkServer.sh start

    启动JournalNode(3个都启动)

    出现问题:hadoop-deamon.sh: 未找到命令!,找不到或无法加载主类 journode(注意单词拼写!)

    解决方法:

    hadoop-daemon.sh start journalnode
    jps

    格式化namenode(只在hd01主机上)

    hdfs namenode -format

    将hd01上的Namenode的元数据复制到hd02相同位置

    scp -r /opt/soft/hadoop260/dfs/namenode_data/current/ root@hd02:/opt/soft/hadoop260/dfs/namenode_data/

     

    在hd01或hd02格式化故障转移控制器zkfc

    hdfs zkfc -formatZK

    在hd01上启动dfs服务

    start-dfs.sh 
    jps

    在hd03上启动yarn服务

    start-yarn.sh 
    jps

    在hd01上启动history服务器

    mr-jobhistory-daemon.sh start historyserver 

    在hd02上启动resourcemanager服务

    yarn-daemon.sh start resourcemanager

    在hd01上启动

    hdfs haadmin -getServiceState nn1

    在hd02上启动

    hdfs haadmin -getServiceState nn2

    浏览器地址输入端口号

     

     

    测试(杀死hd01 查看hd02是否激活)!!!

     

    查看浏览器!(因为上面杀掉了进程 所以就应该访问不了)

     再次开启namenode

    hadoop-daemon.sh start namenode

     

     

  • 相关阅读:
    【GIS面试】GIS算法介绍
    Esri 2022 UC 开幕式中Esri总裁杰克的演讲
    VBA 剪切板
    软考系列(系统架构师)- 2009年系统架构师软考案例分析考点
    addBatch()和executeBatch()
    创建型模式 - 简单工厂模式StaticFactoryMethod
    ts的使用
    分布式任务调度ScheduleMaster
    337. 打家劫舍 III
    MYSQL-->InnoDB引擎底层原理
  • 原文地址:https://blog.csdn.net/just_learing/article/details/126164192