# 环境部署
基于 ubuntu 20.04 的 vagrant 环境部署 hadoop 集群环境
# 1. 安装 Vagrant 和 VirtualBox
1
| sudo apt-get install virtualbox vagrant -y
|
# 2. 配置 Vagrantfile
- 首先检查内部网络的配置文件是否存在,若存在,则以配置文件显示的网段为准:
1 2
| cat /etc/vbox/networks.conf * 192.168.10.0/24
|
说明内部网段为 192.168.10.1,可通过 virtualbox 的网络管理器界面设置
- 然后配置
Vagrant
文件:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
| Vagrant.configure("2") do |config| config.vm.box = "local/centos7" config.ssh.insert_key = false
(102..104).each do |i| config.vm.define "hadoop#{i}" do |node| node.vm.hostname = "hadoop#{i}" node.vm.provider "virtualbox" do |vb| vb.memory = 16384 vb.cpus = 4 end
node.vm.network "forwarded_port", guest: 22, host: 2200 + i, id: "ssh" node.vm.network "private_network", ip: "192.168.10.#{i}"
node.vm.provision "shell", inline: <<-SHELL # 优先配置DNS和源 echo "====== 配置DNS和YUM源 =====" echo "nameserver 223.5.5.5" | sudo tee /etc/resolv.conf echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf sudo mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup 2>/dev/null || true sudo curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo sudo sed -i -e '/mirrors.cloud.aliyuncs.com/d' -e '/mirrors.aliyuncs.com/d' /etc/yum.repos.d/CentOS-Base.repo sudo yum clean all sudo yum makecache
# 安装基础工具 echo "====== 安装基础工具 =====" sudo yum install -y nmap-ncat openssh-server openssh-clients vim net-tools
# 网络服务配置 sudo systemctl restart network sudo systemctl start sshd sudo systemctl enable sshd sudo systemctl stop firewalld sudo systemctl disable firewalld
# SSH密钥配置 mkdir -p /home/vagrant/.ssh chmod 700 /home/vagrant/.ssh yes | ssh-keygen -t rsa -N "" -f /home/vagrant/.ssh/id_rsa >/dev/null cat /home/vagrant/.ssh/id_rsa.pub >> /home/vagrant/.ssh/authorized_keys chmod 600 /home/vagrant/.ssh/authorized_keys echo -e "Host *\n StrictHostKeyChecking no" > /home/vagrant/.ssh/config chown -R vagrant:vagrant /home/vagrant/.ssh SHELL end end config.vm.synced_folder ".", "/vagrant", disabled: true config.vm.provision "shell", run: "always", inline: <<-SHELL HOST_IPS=$(hostname -I) echo "主机的IP地址是: $HOST_IPS" ping -c 3 mirrors.aliyun.com || { echo "网络检查失败!"; exit 1; } ping -c 3 "192.168.10.1" || { echo "网关检查失败!"; exit 1; } SHELL end
|
- 配置并启动虚拟机:
- 使用 terminus 设置跳板机连接虚拟机:
1
| current_pc -> root -> hadoop102, hadoop103, hadoop104
|
- 在三台虚拟机中配置 hadoop 集群
# Hadoop 集群部署
同时
配置 hadoop102~hadoop104 的初始环境:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| sudo su # 123 sudo passwd root
sed -i 's/SELINUX=enforcing/SELINUX=permissive/g' /etc/selinux/config
sed -i \ -e 's/^#\?PubkeyAuthentication.*/PubkeyAuthentication yes/' \ -e 's/^#\?PasswordAuthentication.*/PasswordAuthentication yes/' \ -e 's/^GSSAPIAuthentication yes/GSSAPIAuthentication no/' \ -e 's/^#\?PermitRootLogin.*/PermitRootLogin yes/' \ -e 's/^#\?UseDNS.*/UseDNS no/' \ /etc/ssh/sshd_config
systemctl restart sshd
cat >> /etc/hosts <<EOF 192.168.10.102 hadoop102 192.168.10.103 hadoop103 192.168.10.104 hadoop104 EOF
|
- 配置免密登录:
1 2 3
| ssh-keygen -t rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 600 ~/.ssh/authorized_keys
|
分别在不同的虚拟机上操作,将秘钥复制到其他虚拟机当中
1 2 3 4 5 6 7 8 9 10 11
| # hadoop102 ssh-copy-id hadoop103 ssh-copy-id hadoop104
# hadoop103 ssh-copy-id hadoop102 ssh-copy-id hadoop104
# hadoop104 ssh-copy-id hadoop102 ssh-copy-id hadoop103
|
这里部署了一个中间网盘用来拷贝文件,否则就要去解决 ssh 权限文件夹问题
docker run -d --name filebrowser --restart always -v ./filebrowser/srv:/srv -v ./filebrowser/config/config.json:/etc/config.json -v ./filebrowser/db/database.db:/etc/database.db -p 18080:80 filebrowser/filebrowser
- 配置 hadoop 集群
在 hadoop102
上:
配置自定义脚本:
1 2 3 4 5
| mkdir /root/scripts
cat >> ~/.bashrc <<EOF export PATH=$PATH:/root/scripts EOF
|
配置分发脚本:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| cat >> /root/scripts/xsync << 'EOF' #!/bin/bash
pcount=$# if ((pcount==0)); then echo no args; exit; fi
p1=$1 fname=$(basename "$p1") echo fname=$fname
pdir=$(cd -P "$(dirname "$p1")"; pwd) echo pdir=$pdir
user=$(whoami)
for ((host=103; host<105; host++)); do echo ------------------- hadoop$host -------------- rsync -rvl "$pdir/$fname" "$user@hadoop$host:$pdir" done EOF
|
1
| chmod +x /root/scripts/xsync
|
配置 jdk 和 hadoop 环境
1 2 3
| # jdk1.8 tar -zxvf jdk-8u212-linux-x64.tar.gz -C /opt/module/ mv /opt/module/jdk-8u212-linux-x64.tar.gz /opt/module/jdk1.8
|
1 2
| # hadoop3.1.3 tar -zxvf hadoop-3.1.3.tar.gz -C /opt/module/
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| # 写环境变量 cat >> ~/.bashrc <<EOF export JAVA_HOME=/opt/module/jdk1.8 export PATH=\$JAVA_HOME/bin:\$PATH
export HADOOP_HOME=/opt/module/hadoop-3.1.3 export PATH=\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin:\$PATH
export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root EOF
|
分发:
1 2
| xsync /opt/module/ xsync ~/.bashrc
|
在 hadoop102~hadoop104
上:
|
hadoop102 |
hadoop103 |
hadoop104 |
HDFS |
NameNode |
DataNode |
SecondaryNameNode |
|
DataNode |
DataNode |
DataNode |
YARN |
NodeManager |
ResourceManager |
NodeManager |
|
NodeManager |
NodeManager |
NodeManager |
配置 hadoop 文件:
1
| cd $HADOOP_HOME/etc/hadoop
|
修改 core-site.xml
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hadoop102:8020</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/opt/module/hadoop-3.1.3/data</value> </property> <property> <name>hadoop.http.staticuser.user</name> <value>root</value> </property> </configuration>
|
修改 hdfs-site.xml
:
1 2 3 4 5 6 7 8 9 10
| <configuration> <property> <name>dfs.namenode.http-address</name> <value>hadoop102:9870</value> </property> <property> <name>dfs.namenode.secondary.http-address</name> <value>hadoop104:9868</value> </property> </configuration>
|
修改 yarn-site.xml
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.resourcemanager.hostname</name> <value>hadoop103</value> </property> <property> <name>yarn.nodemanager.env-whitelist</name> <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value> </property> </configuration>
|
修改 mapred-site.xml
:
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>hadoop102:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>hadoop102:19888</value> </property> </configuration>
|
配置 workers
:
1 2 3 4 5
| cat > /opt/module/hadoop-3.1.3/etc/hadoop/workers <<EOF hadoop102 hadoop103 hadoop104 EOF
|
配置 hadoop-env.sh
:
1 2 3 4
| cat >> /opt/module/hadoop-3.1.3/etc/hadoop/hadoop-env.sh <<EOF export JAVA_HOME=/opt/module/jdk1.8 export HADOOP_CONF_DIR=/opt/module/hadoop-3.1.3/etc/hadoop EOF
|
分发配置文件
1
| xsync /opt/module/hadoop-3.1.3/etc
|
初次启动,格式化 namenode
1 2
| hdfs namenode -format # /dfs/name has been successfully formatted.
|
测试:
1 2 3 4 5 6 7 8 9
| # hadoop102上 ./sbin/start-dfs.sh
# hadoop103上 ./sbin/start-yarn.sh
# 查看webui界面 # http://hadoop102:9870 # http://hadoop103:8088
|
创建进程集群监控脚本
1 2 3 4 5 6 7 8
| cat > /root/scripts/jpsall <<EOF #!/bin/bash for host in hadoop102 hadoop103 hadoop104 do echo =============== \$host =============== ssh \$host jps done EOF
|
1
| chmod +x /root/scripts/jpsall
|
创建进程启停脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| cat << 'EOF' > /root/scripts/myhadoop.sh #!/bin/bash
if [ $# -lt 1 ] then echo "Please provide an argument: start or stop" exit 1 fi
case $1 in "start") echo " =================== Starting Hadoop cluster ==================="
echo " --------------- Starting HDFS ---------------" ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/start-dfs.sh" echo " --------------- Starting YARN ---------------" ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/start-yarn.sh" echo " --------------- Starting HistoryServer ---------------" ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon start historyserver" ;; "stop") echo " =================== Stopping Hadoop cluster ==================="
echo " --------------- Stopping HistoryServer ---------------" ssh hadoop102 "/opt/module/hadoop-3.1.3/bin/mapred --daemon stop historyserver" echo " --------------- Stopping YARN ---------------" ssh hadoop103 "/opt/module/hadoop-3.1.3/sbin/stop-yarn.sh" echo " --------------- Stopping HDFS ---------------" ssh hadoop102 "/opt/module/hadoop-3.1.3/sbin/stop-dfs.sh" ;; *) echo "Invalid argument! Usage: myhadoop.sh [start|stop]" ;; esac EOF
|
1
| chmod +x /root/scripts/myhadoop.sh
|
配置时间同步
1 2 3
| yum -y install ntp systemctl start ntpd systemctl is-enabled ntpd
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| vim /etc/ntp.conf
# 这一行取消注释 restrict 192.168.10.0 mask 255.255.255.0 nomodify notrap
# 下面几行增加注释,集群在局域网中,不使用其他互联网上的时间 server 0.centos.pool.ntp.org iburst server 1.centos.pool.ntp.org iburst server 2.centos.pool.ntp.org iburst server 3.centos.pool.ntp.org iburst
# 添加到末尾,当该节点丢失网络连接,依然可以采用本地时间作为时间服务器为集群中的其他节点提供时间同步 server 127.127.1.0 fudge 127.127.1.0 stratum 10
|
1 2 3 4
| vim /etc/sysconfig/ntpd
# 添加 SYNC_HWCLOCK=yes
|
1 2
| xsync /etc/ntp.conf xsync /etc/sysconfig/ntpd
|
1 2 3
| # 在每台机器上都要执行 systemctl restart ntpd systemctl enable ntpd
|
1 2 3 4
| # 在除hadoop102外的其他机器上执行,配置1分钟与时间服务器同步一次 crontab -e
*/1 * * * * /usr/sbin/ntpdate hadoop102
|