sudo hostnamectl set-hostname Master001
su -l root



vi /etc/sysconfig/network-scripts/ifcfg-ens33
ONBOOT=yes
IPADDR=192.168.241.101
NETWASK=255.255.255.0
PREFIX=24
GATEWAY=192.168.241.2
BOOTPROTO=static
修改主机名

vi /etc/resolv.conf

vi /etc/hosts

nmcli connection reload
nmcli connection up ens33
nmcli d connect ens33



adduser hadoop
passwd hadoop

usermod -a -G hadoop hadoop

vi /etc/sudoers
hadoop ALL=(ALL) ALL



tar -zxf jdk-8u221-linux-x64.tar.gz -C /home/hadoop/module/
tar -zxf hadoop-3.3.1.tar.gz -C /home/hadoop/module/

vi /etc/profile
#JAVA
export JAVA_HOME=/home/hadoop/module/jdk1.8.0_221
export PATH=$PATH:$JAVA_HOME/bin
#HADOOP
export HADOOP_HOME=/home/hadoop/module/hadoop-3.3.1
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH


<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/module/hadoop-3.3.1/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>

<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/module/hadoop-3.3.1/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/module/hadoop-3.3.1/tmp/dfs/data</value>
</property>

如果要多次执行格式化,要删除data目录,否则datanode进程无法启动
hdfs namenode -format


格式化成功后name目录多了一个current文件夹
ssh-keygen -t rsa -P ''

将密钥传给Master001
ssh-copy-id Master001


hadoop配置安装完成

安装包下载:https://hive.apache.org/downloads.html
Hive安装

export HIVE_HOME=/home/hadoop/module/hive-3.1.2
export PATH=$PATH:$HIVE_HOME/bin






查看linux系统中是否自带数据库
rpm –qa | grep mysql
安装mysql 数据库
yum install –y mysql-server mysql mysql-devel

使mysql开机启动
systemctl enable mysqld.service
启动mysql服务,查看状态
service mysqld start
Service mysqld status

初始化mysql
创建管理员用户密码

登录mysql数据库
mysql –u root -p

创建存放元数据的数据库

下载mysql-connector-java-8.0.26.java,上传到hive安装目录lib目录下
cp mysql-connector-java-8.0.26.jar /home/hadoop/module/hive-3.1.2/lib/

schematool -dbType mysql -initSchema





yum install gcc



yum -y install gcc automake autoconf libtool make


进入解压目录中redis/src


/usr/local/bin

redis-server





redis-cli

使用的Spark版本是3.1.2,其对应的Scala版本是2.12.x
https://www.scala-lang.org/download/2.12.15.html


tar -zxf scala-2.12.15.tgz -C /home/hadoop/module/

export SCALA_HOME=/home/hadoop/module/scala-2.12.15
export PATH=$PATH:$SCALA_HOME/bin

scala
使用:quit就能退出
https://archive.apache.org/dist/spark/spark-3.1.2/


tar -zxf spark-3.1.2-bin-without-hadoop.tgz -C /home/hadoop/module/

export SPARK_HOME=/home/hadoop/module/spark-3.1.2
export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin

cp spark-env.sh.template spark-env.sh
cp workers.template workers

export JAVA_HOME=/home/hadoop/module/jdk1.8.0_221
export HADOOP_HOME=/home/hadoop/module/hadoop-3.3.1
export HADOOP_CONF_DIR=/home/hadoop/module/hadoop-3.3.1/etc/hadoop
export SPARK_MASTER_IP=192.168.241.101
export SPARK_DIST_CLASSPATH=$(/home/hadoop/module/hadoop-3.3.1/bin/hadoop classpath)
works

start-dfs.sh
start-yarn.sh
start-master.sh
./sbin/start-slave.sh spark://Master001:7077
启动master后查看网页http://192.168.241.101:8080/
然后再启动works,启动 worker 时需要 master 的参数,该参数为spark://Master001:7077
查看进程

spark-shell
执行如下命令启动Spark Shell连接到YARN集群管理器上
./bin/spark-shell --master yarn
## 8、spark编程练习
在Spark-shell中读取本地文件“/home/hadoop/test.txt”,统计文件行数
## 9、安装sbt或maven
使用 Scala 编写的程序需要使用 sbt或Maven 进行编译打包
https://www.scala-sbt.org/download.html
解压sbt,将bin目录下的sbt-launch,jar复制到/home/hadoop/module/sbt/中
编辑./sbt
SBT_OPTS=“-Xms512M -Xmx1536M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=256M”
java $SBT_OPTS -jardirname $0/sbt-launch.jar “$@”
为 ./sbt 脚本增加可执行权限
chmod u+x ./sbt
运行如下命令,检验 sbt 是否可用
./sbt sbtVersion

https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/

maven安装
unzip apache-maven-3.8.3-bin.zip -d /home/hadoop/module/


1、Master001: ERROR: Unable to write in /home/hadoop/module/hadoop-3.3.1/logs. Aborting.

2、Warning: Permanently added ‘localhost’ (ECDSA) to the list of known hosts

3、root用户hadoop启动报错:Attempting to operate on hdfs namenode as root
4、执行yum install -y mysql-server mysql mysql-devel报错
为 repo ‘appstream’ 下载元数据失败 : Cannot prepare internal mirrorlist: No URLs in mirrorlist

5、Underlying cause: java.sql.SQLException : null, message from server: “Host ‘Master001’ is not allowed to connect to this MySQL server”

6、hadoop 不在 sudoers 文件中。此事将被报告
1、权限不够,授予权限
sudo chmod 777 /home/hadoop/module/hadoop-3.3.1/logs/
2、设置免密登录

3、在环境变量中添加如下几个配置:
vi /etc/profile
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=roo
使环境变量生效
source /etc/profile
4、可以在/etc/yum.repos.d中更新一下源。使用vault.centos.org代替mirror.centos.org。
执行一下两行代码进行修改
sudo sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-*
sudo sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-*
5、进入mysql,更新权限


update user set host ='%' where user ='root';
重启mysql:
service mysqld stop;
service mysqld start;
6、用root身份登录,然后执行如下命令:
visudo
在打开的文件中,找到下面这一行:
root ALL=(ALL) ALL
并紧帖其下面,添上自己的用户名,比如:hadoop
hadoop ALL=(ALL) ALL
如果只做到这一步,然后保存退出,那么就能使用sudo命令了,但是此时需要输入root密码才可以。要让执行时不需要输入密码,再找到下面这一句:
#%wheel ALL=(ALL) NOPASSWD: ALL
将#号去掉,使其生效。

接着我们执行如下命令,将用户"hadoop"加入到"wheel"组中
gpasswd -a hadoop wheel
