优点:使用yum直接安装RPM包,标准方法,配置简单。
1. 准备好环境,3台机器,1台作为master管理节点,另外2台作为计算节点,分别为:
m1: 192.168.3.30
s1: 192.168.3.31
s2: 192.168.3.32
配置/etc/hosts,确保互相可以靠机器名查找,如:
192.168.3.30 m1
192.168.3.31 s1
192.168.3.32 s2
2. 登录节点m1,s1,s2,安装epel-release:
[root@m1 yum.repos.d]# yum install epel-release
。。。
Installed:
epel-release.noarch 0:7-9
Complete!
3. 在各节点安装和OpenHPC官方仓库:
[root@m1 yum.repos.d]# yum install https://github.com/openhpc/ohpc/releases/download/v1.3.GA/ohpc-release-1.3-1.el7.x86_64.rpm
。。。
Installed:
ohpc-release.x86_64 0:1.3-1.el7
Complete!
4. master节点(m1)安装PBSPro server包:
[root@m1 ~]# yum install -y pbspro-server-ohpc
5. Slave节点(s1,s2)安装PBSPro execution包:
[root@s1 ~]# yum install -y pbspro-execution-ohpc
6. 配置Slave节点(s1, s2):
PBS_SERVER=m1
/var/spool/pbs/mom_priv/config
7. 启动PBSPro集群:
[root@m1 ~]# systemctl enable pbs
Created symlink from /etc/systemd/system/multi-user.target.wants/pbs.service to /usr/lib/systemd/system/pbs.service.
[root@m1 ~]# systemctl start pbs
[root@s1 ~]# systemctl enable pbs
Created symlink from /etc/systemd/system/multi-user.target.wants/pbs.service to /usr/lib/systemd/system/pbs.service.
[root@s1 ~]# systemctl start pbs
[root@m1 ~]# . /etc/profile.d/pbs.sh
[root@m1 ~]# qmgr -c 'create node s1'
[root@m1 ~]# qmgr -c 'create node s2'
[root@m1 ~]# pbsnodes -a
s1
Mom = s1
Port = 15002
pbs_version = 14.1.0
ntype = PBS
state = free
pcpus = 1
resources_available.arch = linux
resources_available.host = s1
resources_available.mem = 918488kb
resources_available.ncpus = 1
resources_available.vnode = s1
resources_assigned.accelerator_memory = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.netwins = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
s2
Mom = s2
Port = 15002
pbs_version = 14.1.0
ntype = PBS
state = free
pcpus = 1
resources_available.arch = linux
resources_available.host = s2
resources_available.mem = 918488kb
resources_available.ncpus = 1
resources_available.vnode = s2
resources_assigned.accelerator_memory = 0kb
resources_assigned.mem = 0kb
resources_assigned.naccelerators = 0
resources_assigned.ncpus = 0
resources_assigned.netwins = 0
resources_assigned.vmem = 0kb
resv_enable = True
sharing = default_shared
8. 提交测试作业,并查看作业运行情况。[xxin@m1 ~]$ echo 'sleep 111' | qsub
2.m1
[xxin@m1 ~]$ qstat
Job id Name User Time Use S Queue
---------------- ---------------- ---------------- -------- - -----
2.m1 STDIN xxin 00:00:00 R workq
[root@m1 ~]# systemctl stop firewalld.service
[root@m1 ~]# systemctl disable firewalld.service