搭建一套源生Hadoop、Spark、Flink集群1-环境准备原创
# 搭建一套源生Hadoop、Spark、Flink集群
本文记录如何搭建原生集群。
# 一、 组件版本
组件 | 版本 | 备注 |
---|---|---|
Hadoop | 3.3.6 | |
Hive | 4.0.0 | |
Spark | 3.5.1 | |
Flink | 1.18.1 | |
Hudi | 0.15.0 |
下载地址(基于Hudi 0.15.0支持的组件倒推):
Hadoop:https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/
Hive:https://dlcdn.apache.org/hive/hive-4.0.0/
Spark:https://dlcdn.apache.org/spark/spark-3.5.1/
Flink:https://dlcdn.apache.org/flink/flink-1.18.1/
Hudi(注意Hudi要支持Hadoop 3,需要自己编译,需要编译好的jar包可以联系我):
https://repo1.maven.org/maven2/org/apache/hudi/hudi-flink1.18-bundle/0.15.0/hudi-flink1.18-bundle-0.15.0.jar
https://repo1.maven.org/maven2/org/apache/hudi/hudi-spark3.5-bundle_2.12/0.15.0/hudi-spark3.5-bundle_2.12-0.15.0.jar
# 二、集群规划
Master-bd181 | Master-bd182 | core-bd183 | core-bd184 | core-bd185 |
---|---|---|---|---|
nn | dn/nn,nn是高可用才配置 | dn | dn | dn |
rm | nm/rm,rm是高可用才配置 | nm | nm | nm |
zk | zk | zk | ||
hive | hive | hive | hive | hive |
kafka | kafka | kafka | ||
spark | spark | spark | spark | spark |
datax | datax | datax | datax | datax |
Ds-master | Ds-master | Ds-worker | Ds-worker | Ds-worker |
maxwell | ||||
supset | ||||
mysql | ||||
flume | flume | |||
flink | flink | |||
clickhouse | ||||
redis | ||||
hbase |
# 三、Linux系统准备(Centos虚拟机)
系统安装过程略,主要介绍系统的基本配置
# 1.网络设置
# 本机名字
vim /etc/hostname
bd181
# Ip地址ifcfg-enp0s3这个根据网卡会不一样
vim /etc/sysconfig/network-scripts/ifcfg-enp0s3
# 配置网络
----------------------------------------------
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
#BOOTPROTO="dhcp"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="enp0s3"
UUID="dc9a38d9-c27f-4108-91df-396007211b40"
DEVICE="enp0s3"
ONBOOT="yes"
#IP地址,按实际配置
IPADDR=192.168.0.181
#网关,按实际配置
GATEWAY=192.168.0.1
#域名解析器
DNS1=8.8.8.8
----------------------------------------------
# 配置hosts
vim /etc/hosts
----------------------------------------------------------------------------------
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.0.181 bd181
192.168.0.182 bd182
192.168.0.183 bd183
192.168.0.184 bd184
192.168.0.185 bd185
----------------------------------------------------------------------------------
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 2.配置阿里源
# 1. 备份当前的YUM仓库配置
sudo mv /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
# 2. 下载阿里云的YUM仓库配置文件
curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo
# 3. 清除缓存并生成新的缓存
sudo yum clean all
sudo yum makecache
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 2.配置root用户ssh免密登录
以bd181为例,其他虚拟机一样操作
# 生成公钥和私钥
ssh-keygen -t rsa #敲3个回车
# 公钥拷贝到要免密登录的目标机器上
ssh-copy-id bd181
ssh-copy-id bd182
ssh-copy-id bd183
ssh-copy-id bd184
ssh-copy-id bd185
# 试验,能登录即正常
ssh bd181
ssh bd182
ssh bd183
ssh bd184
ssh bd185
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# 3.编辑同步执行命令脚本
vim /bin/xcall
--------------------------------
#! /bin/bash
for i in bd185 bd184 bd183 bd182 bd181
do
echo --------- $i ----------
ssh $i "$*"
done
---------------------------------
# 修改权限
chmod 777 /bin/xcall
# 试验
xcall jps
1
2
3
4
5
6
7
8
9
10
11
12
13
14
2
3
4
5
6
7
8
9
10
11
12
13
14
# 4.集群文件同步脚本
# 安装rsync
xcall yum install rsync -y
------------------------------------------
#!/bin/bash
#1. 判断参数个数
if [ $# -lt 1 ]
then
echo Not Enough Arguement!
exit;
fi
#2. 遍历集群所有机器
for host in bd181 bd182 bd183 bd184 bd185
do
echo ==================== $host ====================
#3. 遍历所有目录,挨个发送
for file in $@
do
#4 判断文件是否存在
if [ -e $file ]
then
#5. 获取父目录
pdir=$(cd -P $(dirname $file); pwd)
#6. 获取当前文件的名称
fname=$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir
else
echo $file does not exists!
fi
done
done
-----------------------------------------------
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# 5. 基本设置
注意:如果同时开5个虚拟机,且都是桥接模式,有可能下面yum安装软件会报404错误,此时逐个服务器安装即可。
# 安装epel-release
yum install -y epel-release
# 工具包集合,包含ifconfig等命令
yum install -y net-tools
# vim
yum install -y vim
# 关闭防火墙,关闭防火墙开机自启
xcall systemctl stop firewalld
xcall systemctl disable firewalld.service
# 清空防火墙规则
xcall iptables -F
# 关闭透明大页,这里先在bd181追加好文件
echo 'echo never > /sys/kernel/mm/transparent_hugepage/defrag' >> /etc/rc.d/rc.local
echo 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' >> /etc/rc.d/rc.local
chmod +x /etc/rc.d/rc.local
xsync /etc/rc.d/rc.local
# 如果在后面的检测中,还是会报警告则可以再次执行下面两句,先在bd181追加好文件
echo never > /sys/kernel/mm/transparent_hugepage/defrag
echo never > /sys/kernel/mm/transparent_hugepage/enabled
# 修改Linux swappiness参数,先在bd181追加好文件
echo 'vm.swappiness=10'>> /etc/sysctl.conf
xcync /etc/sysctl.conf
# 关闭selinux
vim /etc/selinux/config
SELINUX=disabled
xsync /etc/selinux/config
# 配置时间同步,每个节点都需要安装
# 安装
xcall yum -y install chrony
# 配置主服务bd181
vim /etc/chrony.conf
--------------------------------------
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
server 127.127.1.0 #本地
--------------------------------------
# 其他节点跟随主服务器182~185,把server0~3注释掉,其他不改
--------------------------------------
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
server bd181 iburst
--------------------------------------
# 开机启动
xcall systemctl enable chronyd
xcall systemctl start chronyd
xcall chronyc tracking
# 硬件时钟设置为UTC
xcall timedatectl set-local-rtc 0
# 设置本地时区,显示本地时间
xcall timedatectl set-timezone Asia/Shanghai
# 手动加载RTC设置
xcall hwclock --systohc
# 设置ntp为true
xcall timedatectl set-ntp true
#每台节点修改大进程数和最大文件句柄数
echo '* soft nproc 65535' >> /etc/security/limits.conf
echo '* hard nproc 65535' >> /etc/security/limits.conf
echo '* soft nofile 65535' >> /etc/security/limits.conf
echo '* hard nofile 65535' >> /etc/security/limits.conf
xsync /etc/security/limits.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# 6. 安装JDK
# 卸载现有JDK(所有节点)
xcall sudo rpm -qa | grep -i java | xargs -n1 sudo rpm -e --nodeps
# 将jdk包放到虚拟机目录
tar -zxvf jdk-8u212-linux-x64.tar.gz -C /opt/apache
# 配置JDK环境变量
vim /etc/profile.d/my_env.sh
#JAVA_HOME
export JAVA_HOME=/opt/apache/jdk1.8.0_212
export PATH=$PATH:$JAVA_HOME/bin
# 服务器同步
xsync /opt/apache/jdk1.8.0_212
xsync etc/profile.d/my_env.sh
xcall source /etc/profile.d/my_env.sh
# 测试,确保所有服务器都安装好JDK
xcall java -version
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# 7. 创建用户
# 创建eddie用户,并修改eddie用户的密码
xcall useradd eddie
xcall passwd eddie
1
2
3
2
3
配置用户具有root权限,方便后期加sudo执行root权限的命令
vim /etc/sudoers
# 在%wheel这行下面添加一行,如下所示
## Allow root to run any commands anywhere
root ALL=(ALL) ALL
## Allows people in group wheel to run all commands
%wheel ALL=(ALL) ALL
eddie ALL=(ALL) NOPASSWD:ALL
1
2
3
4
5
6
7
2
3
4
5
6
7
用eddie用户登录,且配置服务器间免密登录
# 创建密钥
xcall ssh-keygen -t rsa
# 拷贝公钥,得登录各服务器,用xcall不行,
ssh-copy-id bd181
ssh-copy-id bd182
ssh-copy-id bd183
ssh-copy-id bd184
ssh-copy-id bd185
# 可以修改/etc/ssh/ssh_config文件StrictHostKeyChecking no解决,这里没有使用这种方法,当服务器很多时可使用此方法
# 可以使用sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd181" 配合StrictHostKeyChecking no解决输入密码及yes操作
su root
xcall yum install -y sshpass
xcall sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd181"
xcall sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd182"
xcall sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd183"
xcall sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd184"
xcall sshpass -p 密码 ssh eddie@bd181 "ssh-copy-id bd185"
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 8.创建软件目录
xcall mkdir /opt/apache
xcall mkdir /opt/softwares
xcall mkdir /opt/module
xcall mkdir /opt/apache_data
# 改变属主
xcall chown -R eddie:eddie /opt/module
xcall chown -R eddie:eddie /opt/softwares
xcall chown -R eddie:eddie /opt/apache_data
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
使用ftp工具一股脑把所有软件包放到/opt/softwares目录下,软件包可以联系我获取。
- 03
- 求股票波峰波谷 原创04-08