1、检查/etc/hosts中 ip hostname 映射
20.0.0.1 主机名
ip addr
hotname
2、本机免密
cd ~
ssh-keygen -t rsa
cat id_rsa.pub>>authorized_keys
ls .ssh/
id_rsa id_rsa.pub authorized_keys [known_hosts]
ssh root@主机名
3、解压缩并重命名
tar -zxvf /opt/download/hadoop-3.1.3.tar.gz -C /opt/software
mv /opt/software/hadoop-3.1.3 /opt/software/hadoop313
4、配置环境变量
vi /etc/profile.d/myenv.sh
export HADOOP_HOME=/opt/software/hadoop313
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
运行时资源路径
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_CONF_DIR=$HADOOP_HOME
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
激活环境变量
source /etc/profile
5、hadoop-env.sh
export JAVA_HOME=/opt/software/jdk
core-site.xml
hdfs-site.xml
yarn-site.xml
yarn.resourcemanager.address
singlehenry:8032
yarn.resourcemanager.scheduler.address
singlehenry:8030
yarn.resourcemanager.resource-tracker.address
singlehenry:8031
yarn.app.mapreduce.am.resource.mb
256
yarn.application.classpath
/opt/software/hadoop313/etc/hadoop:/opt/software/hadoop313/share/hadoop/common/lib/*:/opt/software/hadoop313/share/hadoop/common/*:/opt/software/hadoop313/share/hadoop/hdfs:/opt/software/hadoop313/share/hadoop/hdfs/lib/*:/opt/software/hadoop313/share/hadoop/hdfs/*:/opt/software/hadoop313/share/hadoop/mapreduce/lib/*:/opt/software/hadoop313/share/hadoop/mapreduce/*:/opt/software/hadoop313/share/hadoop/yarn:/opt/software/hadoop313/share/hadoop/yarn/lib/*:/opt/software/hadoop313/share/hadoop/yarn/*
mapred-site.xml
#jvm 内存调优(单位:k/m/g)
#-XmxC(m) jvm最大可用内存
#物理内存<1g,默认为物理内存1/2,否则1/4
#-XmsC(m) jvm最小可用内存,可以与Xmx相同以避免gc之后重新分配内存
#若未设置则为新生代+老年代
#>=8m,否则为物理内存1/64,最大为1g
#-XmnY(g) jvm年轻代(jvm内存=年轻代(eden & 2*survivor)Yg+老年代(Cg-Yg-64m)+持久代64m)
#年轻代越大老年代越小,设置不当严重影响jvm性能
#sun官方推荐年轻代大小为堆内存大小的3/8
#-XssN(k) jvm线程栈大小
#jdk(<5:256k,>=5:1m)
#单个线程栈越小并发线程数越多(2000~5000)
#-XX:NewRatio=4 年轻代:老年代 = 1:4 年轻代占整个堆内存的1/5
#默认为2 年轻代:老年代 = 1:2 年轻代占整个堆内存的1/3,且最大为1/3
#-XX:SurvivorRatio=4 eden : 2*survivor = 4 : 6 eden : survivor 2 : 3
#默认为8 eden : 2*survivor = 8 : 2 eden : survivor 8 : 1
#-XX:MaxPermSize=16m 设置永久代大小
#默认大小为 64m
#-XX:MaxTenuringThreadhold=n
#若设置为0,新建对象不进survivor直接进入老年代,适合老年代应用
#若果设置较大值,对象将在survivor区进行多次复制,增加对象在年轻代存活时间,增加在年轻代即被回收概率
#垃圾回收设置
#-XX:+UseParallelGC
#年轻代使用吞吐量优先的并行收集器
#-XX:ParallelGCThreads=4
#设置并行收集器的线程数,一般为核数
#-XX:+UserParallelOldGC
#老年代使用吞吐量优先的并行收集器
#-XX:MaxGCPauseMillis=100
#设置单次年轻代垃圾回收最大时长,如果无法满足,jvm自动调整年轻代大小以满足此设置
#-XX:+UseAdaptiveSizePolicy
#使用并行选择器时,建议一直打开
#自动调整new和old的比例,以满足最大时间或收集频率等需求
mapreduce.map.memory.mb
1024
mapreduce.map.java.opts
-Xmx1024M
mapreduce.map.java.opts
-Xms128M
mapreduce.reduce.memory.mb
2048
mapreduce.reduce.java.opts
-Xmx2048M
mapreduce.map.java.opts
-Xms256M
6、namenode格式化
cd /opt/software/hadoop313
#注意:非首次格式化,必须提前删除core-site.xml中配置目录hadoop.tmp.dir项目录中的所有内容
rm -rf /opt/software/hadoop313/data
./bin/hdfs namenode -format
7、启动服务
start-dfs.sh + start-yarn.sh = start-all.sh
./sbin/mr-jobhistory-daemon.sh start historyserver
jps
---------------------------------
2035 DataNode
2262 SecondaryNameNode
2552 ResourceManager
2696 NodeManager
1913 NameNode
4509 JobHistoryServer
---------------------------------
如果出错,重点检查 …hadoop313/logs/hadoop-root-[namenode/datanode/resourcemanager/nodemanager].log
tail -30 hadoop-root-namenode-singlehenry.log
8、web访问
20.0.0.180:9870 -> namenode
20.0.0.180:8088 -> jobhistory
9、lunix cmd访问
hdfs dfs -ls /
hdfs dfs -mkdir -p /kb12/hd/cha01
hdfs dfs -put a.txt /kb12/hd/cha01
hdfs dfs -cat /kb12/hd/cha01/*
10、java opr hadoop
#1、准备windows hadoop环境
hadoop.dll -> windows/system32
#2、将linux下hadoop打包
cd /opt/software
tar -zcvf hadoop313.tar.gz hadoop313/
#将压缩包通过xftp或sz将压缩包拷贝至windows
#将压缩包解压至windows d:/software目录
#若解压报错:win->search winrar->右键以管理员方式启动->选择压缩包解压至目标路径
#3、将windutils.exe拷贝至windows hadoop313/bin
winutils.exe -> hadoop313/bin
#4、迁移lunix hadoop至windows配置环境变量
#配置windows hadoop环境变量
HADOOP_HOME=D:softwarehadoop313
Path : %HADOOP_HOME%in;%HADOOP_HOME%sbin;…
HADOOP_USER_NAME=root
#5、配置windows环境下linux中hostname和ip地址映射
windows/System32/drivers/etc/hosts
20.0.0.180 singlehenry
#6、退出安全模式
hadoop dfsadmin -safemode leave
#7、自带案例
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /input/wc01.log /output