ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

Slurm及OpenLDAP部署

2021-06-03 18:03:43  阅读:374  来源: 互联网

标签:node01 部署 slurm Slurm etc OpenLDAP so root pam


Slurm及OpenLDAP部署

概述

Slurm是一个开源、容错且高度可扩展性的集群管理和作业调度系统,用于大型和小型Linux集群。

Slurm提供三种关键功能:

  • 分配对资源的排他和/非排他访问
  • 提供一个用于在分配的节点集上启动、执行和监视作业的框架
  • 通过管理一个未完成作业队列来解决对资源的争用

一、构建拓扑结构

  • 搭建4台Linux服务器

  • 配置IP地址及主机名

    [root@localhost ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
    # 修改网卡参数,没有添加即可
    IPADDR=192.168.100.100
    NETMASK=255.255.255.0
    GATEWAY=192.168.100.2
    DNS1=114.114.114.114
    ONBOOT=yes
    BOOTPROTO=static
    
    [root@localhost ~]# systemctl restart network
    [root@localhost ~]# hostnamectl set-hostname slurm
    [root@localhost ~]# bash
    [root@slurm ~]#
    
    #-------------------Node01----------------
    
    [root@localhost ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
    # 修改网卡参数,没有添加即可
    IPADDR=192.168.100.101
    NETMASK=255.255.255.0
    GATEWAY=192.168.100.2
    DNS1=114.114.114.114
    ONBOOT=yes
    BOOTPROTO=static
    
    [root@localhost ~]# systemctl restart network
    [root@localhost ~]# hostnamectl set-hostname node01
    [root@localhost ~]# bash
    [root@node01 ~]#
    
    #-------------------Node02----------------
    
    [root@localhost ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
    # 修改网卡参数,没有添加即可
    IPADDR=192.168.100.102
    NETMASK=255.255.255.0
    GATEWAY=192.168.100.2
    DNS1=114.114.114.114
    ONBOOT=yes
    BOOTPROTO=static
    
    [root@localhost ~]# systemctl restart network
    [root@localhost ~]# hostnamectl set-hostname node02
    [root@localhost ~]# bash
    [root@node02 ~]#
    
    #-------------------Node03----------------
    
    [root@localhost ~]# vi /etc/sysconfig/network-scripts/ifcfg-ens33
    # 修改网卡参数,没有添加即可
    IPADDR=192.168.100.103
    NETMASK=255.255.255.0
    GATEWAY=192.168.100.2
    DNS1=114.114.114.114
    ONBOOT=yes
    BOOTPROTO=static
    
    [root@localhost ~]# systemctl restart network
    [root@localhost ~]# hostnamectl set-hostname node03
    [root@localhost ~]# bash
    [root@node03 ~]#
    
    
  • 修改Hosts文件(4台服务器一致)

    [root@slurm ~]# vi /etc/hosts
    # 新增
    192.168.100.100 slurm
    192.168.100.101 node01
    192.168.100.102 node02
    192.168.100.103 node03
    
    # node01-03同样需要修改参照上面配置即可
    

二、安装Slurm应用

  • 上传slurm-EL7.5GUI-install.tar.gz文件至4台服务器的root目录下

    注:4台机器都需安装

    # 方法不限
    E:\cmp>scp slurm19-EL7.5GUI-installer-1210.tar.gz root@192.168.100.100:/root
    E:\cmp>scp slurm19-EL7.5GUI-installer-1210.tar.gz root@192.168.100.101:/root
    E:\cmp>scp slurm19-EL7.5GUI-installer-1210.tar.gz root@192.168.100.102:/root
    E:\cmp>scp slurm19-EL7.5GUI-installer-1210.tar.gz root@192.168.100.103:/root
    
  • 解压并执行

    [root@slurm ~]# yum -y install librrd* libhwloc*
    [root@slurm ~]# tar zxvf slurm19-EL7.5GUI-installer-1210.tar.gz
    [root@slurm ~]# cd slurm19-EL7.5GUI-installer
    [root@slurm slurm19-EL7.5GUI-installer]# ./install_slurm.sh 
    [root@slurm ~]# systemctl stop firewalld.service
    [root@slurm ~]# systemctl disable firewalld.service
    

三、配置NTP时间同步

  • 修改配置文件将Slurm调度系统作为本地时间服务器

    [root@slurm etc]# yum -y install ntp
    [root@slurm etc]# vi /etc/ntp.conf 
    #server 0.centos.pool.ntp.org iburst
    #server 1.centos.pool.ntp.org iburst
    #server 2.centos.pool.ntp.org iburst
    #server 3.centos.pool.ntp.org iburst
    server 127.127.1.0
    fudge 127.127.1.0 stratum 10
    
    [root@slurm etc]# systemctl restart ntpd
    
  • 配置3台slurm计算节点的NTP

    [root@node01 etc]# vi /etc/ntp.conf
    #server 0.centos.pool.ntp.org iburst
    #server 1.centos.pool.ntp.org iburst
    #server 2.centos.pool.ntp.org iburst
    #server 3.centos.pool.ntp.org iburst
    server slurm iburst
    
    [root@node01 etc]# ntpdate -u slurm
     3 Jun 14:34:21 ntpdate[1944]: adjust time server 192.168.100.100 offset -0.034998 sec
    [root@slurm etc]# systemctl restart ntpd
    [root@node01 etc]# ntpq -p
         remote           refid      st t when poll reach   delay   offset  jitter
    ==============================================================================
    *slurm           LOCAL(0)        11 u    2   64    1    0.237  -31.197   3.549
    [root@node01 etc]# ntpstat 	# 显示如下即成功
    synchronised to NTP server (192.168.100.100) at stratum 12
       time correct to within 983 ms
       polling server every 64 s
    [root@node01 etc]# 
    

四、修改Slurm配置文件

  • 所有Slurm集群结点均使用相同的配置文件

    [root@slurm ~]# cd slurm19-EL7.5GUI-installer/slurm-config/slurm.conf
    # 将所有的ctl01修改为slurm
    # 12 34 159
    #
    # 并修改文件最后的NODES和PARTITIONS
    
    ################################################
    #                    NODES                     #
    ################################################
    NodeName=node[01-10] CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=972
    #
    ################################################
    #                  PARTITIONS                  #
    ################################################
    PartitionName=computerPartiton Default=YES MinNodes=0 Nodes=cmp[001-101] State=UP
    #
    #  NODES的参数设置为主机名[节点数] 后面的参数请使用slumpd -C命令查看
    [root@node01 slurm-config]# slurmd -C
    NodeName=node01 CPUs=1 Boards=1 SocketsPerBoard=1 CoresPerSocket=1 ThreadsPerCore=1 RealMemory=972
    
  • 将slurm.conf文件复制到各个节点包括主节点

    [root@slurm ~]# cp /root/slurm19-EL7.5GUI-installer/slurm-config/slurm.conf /etc/slurm/
    [root@slurm ~]# scp /root/slurm19-EL7.5GUI-installer/slurm-config/slurm.conf root@192.168.100.101:/etc/slurm/
    [root@slurm ~]# scp /root/slurm19-EL7.5GUI-installer/slurm-config/slurm.conf root@192.168.100.102:/etc/slurm/
    [root@slurm ~]# scp /root/slurm19-EL7.5GUI-installer/slurm-config/slurm.conf root@192.168.100.103:/etc/slurm/
    
  • 配置主控节点

    [root@slurm slurm19-EL7.5GUI-installer]# ./slurm_init_ctld.sh 
    # 自动安装数据库并初始化
    # user = root
    # passwd = 123456a?
    
    [root@slurm slurm19-EL7.5GUI-installer]# cd
    [root@slurm ~]# systemctl restart slurmdbd
    [root@slurm ~]# systemctl status slurmdbd
    ● slurmdbd.service - Slurm DBD accounting daemon
       Loaded: loaded (/usr/lib/systemd/system/slurmdbd.service; enabled; vendor preset: disabled)
       Active: active (running) since Thu 2021-06-03 17:00:20 CST; 6s ago
      Process: 13388 ExecStart=/usr/sbin/slurmdbd $SLURMDBD_OPTIONS (code=exited, status=0/SUCCESS)
       CGroup: /system.slice/slurmdbd.service
               └─13391 /usr/sbin/slurmdbd
    [root@slurm ~]# systemctl enable slurmctld.service
    [root@slurm ~]# systemctl start slurmctld.service
    
  • 配置4台计算节点

    [root@node01 slurm19-EL7.5GUI-installer]# ./cmp_slurm_init.sh 
    
    Slurm computer node installed, configuration successfully!.
    
    [root@node01 slurm19-EL7.5GUI-installer]# sytemctl restart slurmd.service
    [root@node01 slurm19-EL7.5GUI-installer]# sytemctl enable slurmd.service
    
    

五、运行Slurm调度

[root@slurm ~]# sinfo
PARTITION         AVAIL  TIMELIMIT  NODES  STATE NODELIST
computerPartiton*    up   infinite      3   idle node[01-03]
[root@slurm ~]# srun -n 3 hostname
node01
node02
node03

六、部署OpenLDAP

安装前准备

# 关闭selinux、关闭防火墙
[root@slurm ~]# vi /etc/sysconfig/selinux
# This file controls the state of SELinux on the system.
# SELINUX= can take one of these three values:
#     enforcing - SELinux security policy is enforced.
#     permissive - SELinux prints warnings instead of enforcing.
#     disabled - No SELinux policy is loaded.
SELINUX=disabled
# SELINUXTYPE= can take one of three values:
#     targeted - Targeted processes are protected,
#     minimum - Modification of targeted policy. Only selected processes are protected.
#     mls - Multi Level Security protection.
SELINUXTYPE=targeted

[root@slurm ~]# systemctl stop NetworkManager
[root@slurm ~]# systemctl disable NetworkManager
Removed symlink /etc/systemd/system/multi-user.target.wants/NetworkManager.service.
Removed symlink /etc/systemd/system/dbus-org.freedesktop.nm-dispatcher.service.
Removed symlink /etc/systemd/system/network-online.target.wants/NetworkManager-wait-online.service.
[root@slurm ~]# 
  • 安装OpenLDAP服务端(Slurm主控节点上安装)

    [root@slurm ~]# yum -y install openldap openldap-servers openldap-clients migrationtools
    [root@slurm ~]# sed -i -e 's/olcSuffix:.*/olcSuffix: dc=xiaowangc,dc=com/g' /etc/openldap/slapd.d/cn\=config/olcDatabase\=\{2\}hdb.ldif
    [root@slurm ~]# sed -i -e 's/olcRootDN:.*/olcRootDN: cn=admin,dc=xiaowangc,dc=com/g' /etc/openldap/slapd.d/cn\=config/olcDatabase\=\{2\}hdb.ldif
    [root@slurm ~]# echo 'olcRootPw: 123456a?' >> /etc/openldap/slapd.d/cn\=config/olcDatabase\=\{2\}hdb.ldif
    [root@slurm ~]# sed -i -e 's/dn.base="cn=.*"/dn.base="cn=admin,dc=xiaowangc,dc=com"/g' /etc/openldap/slapd.d/cn\=config/olcDatabase\=\{1\}monitor.ldif
    [root@slurm ~]# cp /usr/share/openldap-servers/DB_CONFIG.example /var/lib/ldap/DB_CONFIG
    [root@slurm ~]# chown -R ldap.ldap /var/lib/ldap
    
    # 超级管理员为cn=admin,dc=xiaowangc,dc=com  密码为:123456a?
    [root@slurm ~]# systemctl restart slapd
    [root@slurm ~]# systemctl enable slapd
    Created symlink from /etc/systemd/system/multi-user.target.wants/slapd.service to /usr/lib/systemd/system/slapd.service.
    [root@slurm ~]# cd /etc/openldap/schema/
    [root@slurm ~]# find . -name '*.ldif' -exec ldapadd -Y EXTERNAL -H ldapi:/// -D "cn=config" -f {} \;
    [root@slurm ~]# cd /usr/share/migrationtools/
    [root@slurm ~]# sed -i -e 's/"ou=Group"/"ou=Groups"/g' migrate_common.ph
    [root@slurm ~]# sed -i -e 's/$DEFAULT_MAIL_DOMAIN = .*/$DEFAULT_MAIL_DOMAIN = "hpcce.com";/g' migrate_common.ph
    [root@slurm ~]# sed -i -e 's/$DEFAULT_BASE = .*/$DEFAULT_BASE = "dc=xiaowangc,dc=com";/g' migrate_common.ph
    [root@slurm ~]# sed -i -e 's/$EXTENDED_SCHEMA = 0;/$EXTENDED_SCHEMA = 1;/g' migrate_common.ph
    ./migrate_base.pl > /root/base.ldif
    [root@slurm ~]# ldapadd -x -w 123456a? -D "cn=admin,dc=xiaowangc,dc=com" -f /root/base.ldif
    
  • 部署OpenLDAP客户端(3个计算节点)

    [root@node01 ~]# yum -y install nss-pam-ldapd
    [root@node01 ~]# authconfig --enableldap --enableldapauth --ldapserver="ldap://192.168.100.100:389" --ldapbasedn=" dc=xiaowangc,dc=com " --update
    [root@node01 ~]# authconfig --enablemkhomedir --update
    [root@node01 ~]# authconfig --updateall
    
    
    ​```
    #确保一下三个文件配置一致
    ​```
    
    [root@node01 ~]# vi /etc/nsswitch.conf
    # looked up first in the databases
    #
    # Example:
    #passwd:    db files nisplus nis
    #shadow:    db files nisplus nis
    #group:     db files nisplus nis
    
    passwd:     files sss ldap		# 修改这
    shadow:     files sss ldap		# 修改这
    group:      files sss ldap		# 修改这
    #initgroups: files sss
    
    #hosts:     db files nisplus nis dns
    hosts:      files dns myhostname
    
    # Example - obey only what nisplus tells us...
    #services:   nisplus [NOTFOUND=return] files
    #networks:   nisplus [NOTFOUND=return] files
    #protocols:  nisplus [NOTFOUND=return] files
    #rpc:        nisplus [NOTFOUND=return] files
    #ethers:     nisplus [NOTFOUND=return] files
    #netmasks:   nisplus [NOTFOUND=return] files
    
    bootparams: nisplus [NOTFOUND=return] files
    
    ethers:     files
    netmasks:   files
    networks:   files
    protocols:  files
    rpc:        files
    services:   files sss
    
    netgroup:   files sss ldap		# 修改这
    
    publickey:  nisplus
    
    automount:  files ldap		# 修改这
    aliases:    files nisplus
    
    [root@node01 ~]# vi /etc/pam.d/system-auth
    #%PAM-1.0
    # This file is auto-generated.
    # User changes will be destroyed the next time authconfig is run.
    auth        required      pam_env.so
    auth        required      pam_faildelay.so delay=2000000
    auth        sufficient    pam_unix.so nullok try_first_pass
    auth        requisite     pam_succeed_if.so uid >= 1000 quiet_success
    auth        sufficient    pam_ldap.so use_first_pass		# 修改这
    auth        required      pam_deny.so
    
    account     required      pam_unix.so broken_shadow
    account     sufficient    pam_localuser.so
    account     sufficient    pam_succeed_if.so uid < 1000 quiet
    account     [default=bad success=ok user_unknown=ignore] pam_ldap.so		# 修改这
    account     required      pam_permit.so
    
    password    requisite     pam_pwquality.so try_first_pass local_users_only retry=3 authtok_type=
    password    sufficient    pam_unix.so sha512 shadow nullok try_first_pass use_authtok
    password    sufficient    pam_ldap.so use_authtok		# 修改这
    password    required      pam_deny.so
    
    session     optional      pam_keyinit.so revoke
    session     required      pam_limits.so
    -session     optional      pam_systemd.so
    session     optional      pam_mkhomedir.so umask=0077		# 修改这
    session     [success=1 default=ignore] pam_succeed_if.so service in crond quiet use_uid
    session     required      pam_unix.so
    session     optional      pam_ldap.so		# 修改这
    
    [root@node01 ~]# vi /etc/pam.d/password-auth
    #%PAM-1.0
    # This file is auto-generated.
    # User changes will be destroyed the next time authconfig is run.
    auth        required      pam_env.so
    auth        required      pam_faildelay.so delay=2000000
    auth        sufficient    pam_unix.so nullok try_first_pass
    auth        requisite     pam_succeed_if.so uid >= 1000 quiet_success
    auth        sufficient    pam_ldap.so use_first_pass	# 修改这
    auth        required      pam_deny.so
    
    account     required      pam_unix.so broken_shadow
    account     sufficient    pam_localuser.so
    account     sufficient    pam_succeed_if.so uid < 1000 quiet
    account     [default=bad success=ok user_unknown=ignore] pam_ldap.so		# 修改这
    account     required      pam_permit.so
    
    password    requisite     pam_pwquality.so try_first_pass local_users_only retry=3 authtok_type=
    password    sufficient    pam_unix.so sha512 shadow nullok try_first_pass use_authtok
    password    sufficient    pam_ldap.so use_authtok		# 修改这
    
    
    password    required      pam_deny.so
    
    session     optional      pam_keyinit.so revoke
    session     required      pam_limits.so
    -session     optional      pam_systemd.so
    session     optional      pam_mkhomedir.so umask=0077		# 修改这
    session     [success=1 default=ignore] pam_succeed_if.so service in crond quiet use_uid
    session     required      pam_unix.so
    session     optional      pam_ldap.so		# 修改这
    
    
    [root@node01 ~]# systemctl restart nslcd
    [root@node01 ~]# systemctl restart sshd
    [root@node01 ~]# systemctl enable nslcd
    [root@node01 ~]#
    

八、测试OpenLDAP

使用LDAP Admin软件测试

  • 随意在一个计算节点验证

    [root@node01 ~]# id testuser01
    uid=44317(testuser01) gid=0(root) groups=0(root)
    [root@node01 ~]# cat /etc/passwd | grep testuser01
    [root@node01 ~]# 
    

标签:node01,部署,slurm,Slurm,etc,OpenLDAP,so,root,pam
来源: https://www.cnblogs.com/xiaowangc/p/14846302.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有