OPENHPC搭建集群

许亦
2023-12-01

目前搭建HPC的开源工具主要有RocksCluster, Xcat,Open-HPC。其中OpenHPC 是Linux基金开源的超算项目,致力于为高性能计算构建一个开源框架,适应学术研究的需求,为 HPC 环境创建一个开源框架,降低成本。

本次采用虚拟机实验,没有Infiniband,故先不介绍Infiniband的配置

主节点系统(CentOS-7.7)配置

  1. 配置参数说明
环境变量变量名称说明模版参数
${sms_name}主机名server
${sms_ip}主节点内网地址192.168.130.100
$sms_eth_internal主节点内网网卡名称ens33
${eth_provision}主节点与计算节点通信网卡名称ens33
${internal_netmask}内网子网掩码255.255.255.0
${ntp_server}本地时间同步服务器192.168.130.100
${c_ip[0]}, {c_ip[1]} ,…计算节点名称192.168.130.[1-5]
${c_name[0]}, ${c_name[1]} ,…计算节点名称192.168.130.[1-5]
${sms_ipoib}主节点Inifiniband网络地址192.168.120.100
${ipoib_netmask}主节点Infiniband网络子网掩码255.255.255.0
${c_ipoib[0]},&{c_ipoib[1]},…计算节点Infiniband网络IP地址192.168.120.[1-5]
$compute_regex计算节点名称正则表达式node[1-5]
${compute_prefix}计算节点名称前缀node
${kargs}Linux内核启动参数net.ifnames=1
  1. 添加hosts文件信任
	[root@server ~]#echo "192.168.130.1  node1" >> /etc/hosts
	[root@server ~]#echo "192.168.130.2  node2" >> /etc/hosts
	[root@server ~]#echo "192.168.130.3  node3" >> /etc/hosts
	[root@server ~]#echo "192.168.130.4  node4" >> /etc/hosts
	[root@server ~]#echo "192.168.130.5  node5" >> /etc/hosts
  1. 关闭防火墙
	[root@server ~]#systemctl stop firewaled.sevice
	[root@server ~]#systemctl disabled firewaled.sevice
  1. 关闭SELinux
	[root@server ~]#vi /etc/selinux/deisabled
  1. 配置NTP服务
	[root@server ~]#echo "server 192.168.130.100" >> /etc/ntp.conf
	[root@server ~]#systemctl enable ntpd.service
  1. 配置内网网卡
	[root@server ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
	TYPE=Ethernet
	PROXY_METHOD=none
	BROWSER_ONLY=no
	BOOTPROTO=static
	DEFROUTE=yes
	IPV4_FAILURE_FATAL=no
	IPV6INIT=yes
	IPV6_AUTOCONF=yes
	IPV6_DEFROUTE=yes
	IPV6_FAILURE_FATAL=no
	IPV6_ADDR_GEN_MODE=stable-privacy
	NAME=ens33
	UUID=******************************
	DEVICE=ens33
	ONBOOT=yes
	IPADDR=192.168.130.100
	PREFIX=24
	IPV6_PRIVACY=no

  1. ohpc-base,warewulf,pbs

安装open-HPC yum源 ohpc-release;

	[root@server ~]#yum install http://build.openhpc.community/OpenHPC:/1.3/CentOS_7/x86_64/ohpc-release-1.3-1.el7.x86_64.rpm
	[root@server ~]#yum -y install ohpc-base
	[root@server ~]#yum -y install ohpc-warewulf
	[root@server ~]#yum -y install pbspro-server-ohpc

管理节warewulf点配置

将/etc/warewulf/provision.conf文件中的 network device = eth1 改成内网网卡ens33

	[root@server ~]# vi /etc/warewulf/provision.conf
		# What is the default network device that the master will use to
		# communicate with the nodes?
		network device = ens33
		# Which DHCP server implementation should be used?
		dhcp server = isc
		
		# What is the TFTP root directory that should be used to store the
		# network boot images? By default Warewulf will try and find the
		# proper directory. Just add this if it can't locate it.
		#tftpdir = /var/lib/tftpboot
		
		# Automatically generate and manage a dynamnic_host virtual file
		# object in the datastore? This is useful for provisioning this
		# out to nodes so they always have a current /etc/hosts file.
		generate dynamic_hosts = yes
		
		# Should we manage and overwrite the local hostfile file on this
		# system? This will cause all node entries to be added
		# automatically to /etc/hosts.
		update hostfile = yes
		
		# If no cluster/domain is set on a node, should we add 'localdomain'
		# as the default domain
		use localdomain = yes
		
		# The default kernel arguments to pass to the nodes boot kernel
		default kargs = "net.ifnames=0 biosdevname=0 quiet"

编辑/etc/xinetd.d/tftp文件, 将disable = yes改为 disable = no

	[root@server ~]# vi /etc/xinetd.d/tftp
	# default: off
	# description: The tftp server serves files using the trivial file transfer \
	#       protocol.  The tftp protocol is often used to boot diskless \
	#       workstations, download configuration files to network-aware printers, \
	#       and to start the installation process for some operating systems.
	service tftp
	{
	        socket_type             = dgram
	        protocol                = udp
	        wait                    = yes
	        user                    = root
	        server                  = /usr/sbin/in.tftpd
	        server_args             = -s /var/lib/tftpboot
	        disable                 = no
	        per_source              = 11
	        cps                     = 100 2
	        flags                   = IPv4
	}

启动xinetd,mariadb,http服务

	[root@server ~]# systemctl restart xinetd.service
	[root@server ~]# systemctl enable xinetd.service
	[root@server ~]# systemctl restart mariadb.service
	[root@server ~]# systemctl enable mariadb.service
	[root@server ~]# systemctl enable httpd.service
	[root@server ~]# systemctl restart httpd.service
	[root@server ~]# systemctl restart dhcpd.service

重启管理节点!

配置计算节点

  1. 配置DHCP服务
	[root@server ~]# cp /etc/warewulf/dhcpd-template.conf /etc/dhcp/dhcpd.conf
	[root@server ~]# vi /etc/dhcp/dhcpd.conf
	# DHCPD Configuration written by Warewulf. Do not edit this file, rather
	# edit the template: /etc/warewulf/dhcpd-template.conf
	
	allow booting;
	allow bootp;
	ddns-update-style interim;
	authoritative;
	
	option space ipxe;
	
	# Tell iPXE to not wait for ProxyDHCP requests to speed up boot.
	option ipxe.no-pxedhcp code 176 = unsigned integer 8;
	option ipxe.no-pxedhcp 1;
	
	option architecture-type   code 93  = unsigned integer 16;
	
	if exists user-class and option user-class = "iPXE" {
	    filename "http://192.168.130.100/WW/ipxe/cfg/${mac}";
	} else {
	    if option architecture-type = 00:0B {
	        filename "/warewulf/ipxe/bin-arm64-efi/snp.efi";
	    } elsif option architecture-type = 00:0A {
	        filename "/warewulf/ipxe/bin-arm32-efi/placeholder.efi";
	    } elsif option architecture-type = 00:09 {
	        filename "/warewulf/ipxe/bin-x86_64-efi/snp.efi";
	    } elsif option architecture-type = 00:07 {
	        filename "/warewulf/ipxe/bin-x86_64-efi/snp.efi";
	    } elsif option architecture-type = 00:06 {
	        filename "/warewulf/ipxe/bin-i386-efi/snp.efi";
	    } elsif option architecture-type = 00:00 {
	        filename "/warewulf/ipxe/bin-i386-pcbios/undionly.kpxe";
	    }
	}
	
	subnet 192.168.130.0 netmask 255.255.255.0 {
	   not authoritative;
	   # option interface-mtu 9000;
	   # option subnet-mask 255.255.255.0;
	}
	
	# Node entries will follow below

  1. 创建BOS镜像
	[root@server ~]#export CHROOT=/opt/ohpc/admin/images/centos7.7
	[root@server ~]#wwmkchroot centos-7 $CHROOT
	[root@server ~]#yum -y --installroot=$CHROOT install ohpc-base-compute
  1. 配置NFS服务

    初始化warewulf database和ssh_keys

	[root@server ~]#wwinit database
	[root@server ~]#wwinit ssh_keys
	[root@server ~]#echo "192.168.130.100:/home /home nfs nfsvers=3,nodev,nosuid 0 0" >> $CHROOT/etc/fstab
	[root@server ~]#echo "192.168.130.100:/opt/ohpc/pub /opt/ohpc/pub nfs nfsvers=3,nodev 0 0" >> $CHROOT/etc/fstab
	[root@server ~]#echo "/home 192.168.130.0/24(rw,no_subtree_check,fsid=10,no_root_squash)" >> /etc/exports
	[root@server ~]#echo "/opt/ohpc/pub 192.168.130,0/24(ro,no_subtree_check,fsid=11)" >> /etc/exports
	[root@server ~]#exportfs -a
	[root@IMP-Cluster ~]# showmount -e
	Export list for server:
	/opt/ohpc/pub 192.168.130.0/24
	/home         192.168.130.0/24
	[root@server ~]#systemctl restart nfs-server.service
	[root@server ~]#systemctl enable nfs-server.service
	[root@server ~]#chroot $CHROOT systemctl enable ntpd.service
	[root@server ~]# echo "server 192.168.130.100" >> $CHROOT/etc/ntp.conf
  1. 配置PBSPro
	[root@server ~]# yum -y --installroot=$CHROOT install pbspro-execution-ohpc
	[root@server ~]# vi $CHROT/etc/pbs.conf
	PBS_EXEC=/opt/pbs
	PBS_SERVER=server		!!!修改为管理节点的名称server
	PBS_START_SERVER=0
	PBS_START_SCHED=0
	PBS_START_COMM=0
	PBS_START_MOM=1
	PBS_HOME=/var/spool/pbs
	PBS_CORE_LIMIT=unlimited
	PBS_SCP=/bin/scp
	[root@server ~]#echo "PBS_LEAF_NAME=server" >> /etc/pbs.conf
	PBS_EXEC=/opt/pbs
	PBS_SERVER=IMP-Cluster
	PBS_START_SERVER=1
	PBS_START_SCHED=1
	PBS_START_COMM=1
	PBS_START_MOM=0
	PBS_HOME=/var/spool/pbs
	PBS_CORE_LIMIT=unlimited
	PBS_SCP=/bin/scp
	PBS_LEAF_NAME=server			!!增加这一行
	[root@server ~]#chroot $CHROOT opt/pbs/libexec/pbs_habitat
	[root@server ~]#echo "\$usecp *:/home /home" >> $CHROOT/var/spool/pbs/mom_priv/config
	[root@server ~]#vi $CHROOT/var/spool/pbs/mom_priv/config
	$clienthost server
	$usecp *:/home /home
	[root@server ~]# chroot $CHROOT systemctl enable pbs
	#NTP网络服务
	[root@server ~]# yum -y --installroot=$CHROOT install ntp
	#内核驱动
	[root@server ~]# yum -y --installroot=$CHROOT install kernel
	# module加载环境
	[root@server ~]# yum -y --installroot=$CHROOT install lmod-ohpc

  1. 取消镜像中计算节点内存资源限制
	[root@server ~]#vi $CHROOT/etc/security/limits.conf
	#<domain>      <type>  <item>         <value>
	#
	
	#*               soft    core            0
	#*               hard    rss             10000
	#@student        hard    nproc           20
	#@faculty        soft    nproc           20
	#@faculty        hard    nproc           50
	#ftp             hard    nproc           0
	#@student        -       maxlogins       4
	
	* soft memlock unlimited		!!修改
	* hard memlock unlimited		!!修改
	# End of file
  1. 配置ganglia监控
	[root@server ~]# yum -y install ohpc-ganglia
	[root@server ~]# yum -y --installroot=$CHROOT install ganglia-gmond-ohpc
	[root@server ~]# cp /opt/ohpc/pub/examples/ganglia/gmond.conf /etc/ganglia/gmond.conf
	[root@server ~]# vi /etc/ganglia/gmond.conf
	/* The host section describes attributes of the host, like the location */
	host {
	  location = "unspecified"
	}
	
	/* Feel free to specify as many udp_send_channels as you like.  Gmond
	   used to only support having a single channel */
	udp_send_channel {
	  #bind_hostname = yes # Highly recommended, soon to be default.
	                       # This option tells gmond to use a source address
	                       # that resolves to the machine's hostname.  Without
	                       # this, the metrics may appear to come from any
	                       # interface and the DNS names associated with
	                       # those IPs will be used to create the RRDs.
	
	  host = server			!!!修改成当前主机名
	  port = 8649
	  ttl = 1
	}
	[root@server ~]# cp /etc/ganglia/gmond.conf $CHROOT/etc/ganglia/gmond.conf
	[root@server ~]# echo "gridname MyCluster" >> /etc/ganglia/gmetad.conf
	# 设置ganglia服务开机启动
	[root@server ~]# systemctl enable gmond.service
	[root@server ~]# systemctl enable gmetad.service
	[root@server ~]# systemctl start gmond.service
	[root@server ~]# systemctl start gmetad.service
	[root@server ~]# chroot $CHROOT systemctl enable gmond.service
	#重启HTTP服务
	[root@server ~]# systemctl try-restart httpd.service
在浏览器中输入http://192.168.130.100/ganglia就可以查看ganglia监控界面。
  1. 安装节点健康检查资源管理包NHC
	# 管理节点和计算节点分别安装NHC
	[root@server ~]# yum -y install nhc-ohpc
	[root@server ~]# yum -y --installroot=$CHROOT install nhc-ohpc
  1. 同步用户信息
	[root@server ~]# wwsh file import /etc/passwd
	[root@server ~]# wwsh file import /etc/group
	[root@server ~]# wwsh file import /etc/shadow

创建计算节点启动镜像

	[root@server ~]# wwbootstrap `uname -r`

组装Virtual Node File System (VNFS) 镜像

	[root@server ~]# wwvnfs --chroot $CHROOT

添加节点

  1. 设置主节点与计算节点默认网络通讯网卡
	# Set provisioning interface as the default networking device
	[root@server ~]# echo "GATEWAYDEV=ens33" > /tmp/network.$$
	[root@server ~]# wwsh -y file import /tmp/network.$$ --name network
	[root@server ~]# wwsh -y file set network --path /etc/sysconfig/network --mode=0644 --uid=0
  1. 设置计算节点网卡名称
	[root@server ~]# export kargs="${kargs} net.ifnames=1,biosdevname=1"
	[root@server ~]# wwsh provision set --postnetdown=1 "node[1-5]"
  1. 设置计算节点安装镜像,启动项与用户信息
	[root@server ~]# wwsh -y provision set "node[1-5]" --vnfs=centos7.7 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,network
  1. 添加节点
	[root@server ~]# wwsh -y node new node1 --netdev=ens33 --ipaddr=192.168.130.1 --hwaddr=00:00:00:00:00:01 -D ens33
	[root@server ~]# wwsh -y node new node2 --netdev=ens33 --ipaddr=192.168.130.2 --hwaddr=00:00:00:00:00:02 -D ens33
	[root@server ~]# wwsh -y node new node3 --netdev=ens33 --ipaddr=192.168.130.3 --hwaddr=00:00:00:00:00:03 -D ens33
	[root@server ~]# wwsh -y node new node4 --netdev=ens33 --ipaddr=192.168.130.4 --hwaddr=00:00:00:00:00:04 -D ens33
	[root@server ~]# wwsh -y node new node5 --netdev=ens33 --ipaddr=192.168.130.5 --hwaddr=00:00:00:00:00:05 -D ens33
  1. 设置计算节点安装在本地硬盘并安装grub引导
	[root@server ~]# yum -y --installroot=$CHROOT install grub2
	[root@server ~]# wwvnfs --chroot $CHROOT
	[root@server ~]# cp /etc/warewulf/filesystem/examples/gpt_example.cmds /etc/warewulf/filesystem/gpt.cmds
	[root@server ~]# wwsh provision set --filesystem=gpt "node[1-5]"
	[root@server ~]# wwsh provision set --bootloader=sda "node[1-5]"
  1. 更新DHCP和PXE
	[root@server ~]# wwsh dhcp update
	[root@server ~]# wwsh pxe update

这是一个简单的脚本,可以自动完成计算节点添加与配置

#!/bin/bash

NODE=node3							!!修改为节点名
IPADDR=192.168.130.3				!!计算节点IP地址
MAC=xx:xx:xx:xx:xx:xx				!!计算节点MAC地址

wwsh -y node new $NODE --netdev=enp2s0 --ipaddr=$IPADDR  --netmask=255.255.255.0 --hwaddr=$MAC -D enp2s0
sleep 1s
export kargs="enp2s0 net.ifnames=1,biosdevname=1"
wwsh -y provision set --postnetdown=1 "$NODE"
wwsh -y provision set "$NODE" --vnfs=centos7.7 --bootstrap=`uname -r` --files=dynamic_hosts,passwd,group,shadow,network
wwsh -y provision set --filesystem=gpt "$NODE"
wwsh -y provision set --bootloader=sda "$NODE"
sleep 1s
wwsh dhcp update
wwsh pxe update

计算节点完成安装后设置

  1. 计算节点安装成功后设置从本地硬盘启动
	[root@server ~]# wwsh provision set --bootlocal=normal "node[1-5]"
  1. 查看安装节点
	#### node1 ####################################################################
          node1: BOOTSTRAP        = 3.10.0-1062.el7.x86_64
          node1: VNFS             = centos7.7
          node1: FILES            = dynamic_hosts,group,network,passwd,shadow
          node1: PRESHELL         = FALSE
          node1: POSTSHELL        = FALSE
          node1: CONSOLE          = UNDEF
          node1: PXELINUX         = UNDEF
          node1: SELINUX          = DISABLED
          node1: KARGS            = "net.ifnames=1,biosdevname=1"
          node1: FS               = "select /dev/sda,mklabel gpt,mkpart primary 1MiB 3MiB,mkpart primary ext4 3MiB 513MiB,mkpart primary linux-swap 513MiB 50%,mkpart primary ext4 50% 100%,name 1 grub,name 2 boot,name 3 swap,name 4 root,set 1 bios_grub on,set 2 boot on,mkfs 2 ext4 -L boot,mkfs 3 swap,mkfs 4 ext4 -L root,fstab 4 / ext4 defaults 0 0,fstab 2 /boot ext4 defaults 0 0,fstab 3 swap swap defaults 0 0"
          node1: BOOTLOADER       = sda
          node1: BOOTLOCAL        = NORMAL
	#### node2 ####################################################################
          node2: BOOTSTRAP        = 3.10.0-1062.el7.x86_64
          node2: VNFS             = centos7.7
          node2: FILES            = dynamic_hosts,group,network,passwd,shadow
          node2: PRESHELL         = FALSE
          node2: POSTSHELL        = FALSE
          node2: CONSOLE          = UNDEF
          node2: PXELINUX         = UNDEF
          node2: SELINUX          = DISABLED
          node2: KARGS            = "net.ifnames=1,biosdevname=1"
          node2: FS               = "select /dev/sda,mklabel gpt,mkpart primary 1MiB 3MiB,mkpart primary ext4 3MiB 513MiB,mkpart primary linux-swap 513MiB 50%,mkpart primary ext4 50% 100%,name 1 grub,name 2 boot,name 3 swap,name 4 root,set 1 bios_grub on,set 2 boot on,mkfs 2 ext4 -L boot,mkfs 3 swap,mkfs 4 ext4 -L root,fstab 4 / ext4 defaults 0 0,fstab 2 /boot ext4 defaults 0 0,fstab 3 swap swap defaults 0 0"
          node2: BOOTLOADER       = sda
          node2: BOOTLOCAL        = NORMAL
	#### node3 ####################################################################
          node3: BOOTSTRAP        = 3.10.0-1062.el7.x86_64
          node3: VNFS             = centos7.7
          node3: FILES            = dynamic_hosts,group,network,passwd,shadow
          node3: PRESHELL         = FALSE
          node3: POSTSHELL        = FALSE
          node3: CONSOLE          = UNDEF
          node3: PXELINUX         = UNDEF
          node3: SELINUX          = DISABLED
          node3: KARGS            = "net.ifnames=1,biosdevname=1"
          node3: FS               = "select /dev/sda,mklabel gpt,mkpart primary 1MiB 3MiB,mkpart primary ext4 3MiB 513MiB,mkpart primary linux-swap 513MiB 50%,mkpart primary ext4 50% 100%,name 1 grub,name 2 boot,name 3 swap,name 4 root,set 1 bios_grub on,set 2 boot on,mkfs 2 ext4 -L boot,mkfs 3 swap,mkfs 4 ext4 -L root,fstab 4 / ext4 defaults 0 0,fstab 2 /boot ext4 defaults 0 0,fstab 3 swap swap defaults 0 0"
          node3: BOOTLOADER       = sda
          node3: BOOTLOCAL        = NORMAL
	#### node4 ####################################################################
          node4: BOOTSTRAP        = 3.10.0-1062.el7.x86_64
          node4: VNFS             = centos7.7
          node4: FILES            = dynamic_hosts,group,network,passwd,shadow
          node4: PRESHELL         = FALSE
          node4: POSTSHELL        = FALSE
          node4: CONSOLE          = UNDEF
          node4: PXELINUX         = UNDEF
          node4: SELINUX          = DISABLED
          node4: KARGS            = "net.ifnames=1,biosdevname=1"
          node4: FS               = "select /dev/sda,mklabel gpt,mkpart primary 1MiB 3MiB,mkpart primary ext4 3MiB 513MiB,mkpart primary linux-swap 513MiB 50%,mkpart primary ext4 50% 100%,name 1 grub,name 2 boot,name 3 swap,name 4 root,set 1 bios_grub on,set 2 boot on,mkfs 2 ext4 -L boot,mkfs 3 swap,mkfs 4 ext4 -L root,fstab 4 / ext4 defaults 0 0,fstab 2 /boot ext4 defaults 0 0,fstab 3 swap swap defaults 0 0"
          node4: BOOTLOADER       = sda
          node4: BOOTLOCAL        = NORMAL
	#### node5 ####################################################################
          node5: BOOTSTRAP        = 3.10.0-1062.el7.x86_64
          node5: VNFS             = centos7.7
          node5: FILES            = dynamic_hosts,group,network,passwd,shadow
          node5: PRESHELL         = FALSE
          node5: POSTSHELL        = FALSE
          node5: CONSOLE          = UNDEF
          node5: PXELINUX         = UNDEF
          node5: SELINUX          = DISABLED
          node5: KARGS            = "net.ifnames=1,biosdevname=1"
          node5: FS               = "select /dev/sda,mklabel gpt,mkpart primary 1MiB 3MiB,mkpart primary ext4 3MiB 513MiB,mkpart primary linux-swap 513MiB 50%,mkpart primary ext4 50% 100%,name 1 grub,name 2 boot,name 3 swap,name 4 root,set 1 bios_grub on,set 2 boot on,mkfs 2 ext4 -L boot,mkfs 3 swap,mkfs 4 ext4 -L root,fstab 4 / ext4 defaults 0 0,fstab 2 /boot ext4 defaults 0 0,fstab 3 swap swap defaults 0 0"
          node5: BOOTLOADER       = sda
          node5: BOOTLOCAL        = NORMAL

	[root@server ~]# wwsh node list
	NAME                GROUPS              IPADDR              HWADDR             
================================================================================
node1               UNDEF               192.168.130.1       xx:xx:xx:xx:xx:01  
node2               UNDEF               192.168.130.2       xx:xx:xx:xx:xx:02  
node3               UNDEF               192.168.130.3       xx:xx:xx:xx:xx:03  
node4               UNDEF               192.168.130.4       xx:xx:xx:xx:xx:04  
node5               UNDEF               192.168.130.5       xx:xx:xx:xx:xx:05  
  1. 可查看/etc/hosts文件,通过warewulf添加成功的hosts
	### ALL ENTRIES BELOW THIS LINE WILL BE OVERWRITTEN BY WAREWULF ###
	#
	# See provision.conf for configuration paramaters
	
	
	# Node Entry for node: node1 (ID=1)
	192.168.130.1           node1.localdomain node1 node1-ens33.localdomain node1-ens33
	
	# Node Entry for node: node2 (ID=2)
	192.168.130.2           node2.localdomain node2 node2-ens33.localdomain node2-ens33
	
	# Node Entry for node: node3 (ID=3)
	192.168.130.3           node3.localdomain node3 node3-ens33.localdomain node3-ens33
	
	# Node Entry for node: node4 (ID=4)
	192.168.130.4           node4.localdomain node4 node4-ens33.localdomain node4-ens33
	
	# Node Entry for node: node5 (ID=5)
	192.168.130.5           node5.localdomain node5 node5-ens33.localdomain node5-ens33
	

至此,整个集群搭建完成,可以部署相关计算应用环境。

 类似资料: