易陆发现互联网技术论坛

 找回密码
 开始注册
查看: 2561|回复: 0
收起左侧

自动化kolla-ansible部署openstack+GPU透传方法

[复制链接]
发表于 2021-6-25 11:36:52 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。

您需要 登录 才可以下载或查看,没有账号?开始注册

x
1. CentOS7.x-8.x系列为虚拟机配置GPU直通
! `& t. W# c6 G" X, D% B) V复制代码6 l0 T  Q9 K! L( ~) ~7 R. _
1. 编辑文件vim  /etc/modules, 添加以下内容:
" F9 ~# Y% p/ D, wpci_stub$ ?/ L# S1 `5 m. d" a
vfio
( z/ u0 P) |6 C5 d* d: Gvfio_iommu_type1
* f/ f% [8 ~, W; S" Q1 Vvfio_pci$ j& E$ Y# j( G" |3 u7 @# |
kvm/ E  Z8 W8 U1 E. {/ n
kvm_intel
* N7 P. l! E% |
  q# C( v7 V" d% l0 i2. 在KVM主机上启用IOMMU ' E+ t% U5 y6 f. e
#对于Intel芯片:
6 Q2 U3 p- S% w4 pGRUB_CMDLINE_LINUX_DEFAULT="intel_iommu=on"
2 S: q/ Q! F' F/ N6 \! y0 {% @#对于AMD芯片:
- X) Y/ L! t( G% p5 f# P! @# d8 fGRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"
! Z" H% O2 N$ c6 P4 @' t9 X
+ N8 U4 V* [/ |) q: |/ u0 uvim /etc/default/grub
$ N" r# Q3 ^2 z0 p. m. K* a/ @
- s! B9 U' ^1 s( yGRUB_TIMEOUT=5( N' W7 Q0 N0 c% y: X. c. b+ |* T
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"6 I* D( i% t( p! _
GRUB_DEFAULT=saved
4 c+ C7 D& y- v) o! F6 WGRUB_DISABLE_SUBMENU=true) ~8 {$ z* m" d7 k% N7 U  J% v: {
GRUB_TERMINAL_OUTPUT="console"
5 u3 t( }) I. K% `9 z0 |GRUB_CMDLINE_LINUX="crashkernel=auto rhgb quiet intel_iommu=on", _5 e1 o& n4 Q) Z3 @
GRUB_DISABLE_RECOVERY="true"& y. ]3 @( M8 `0 e/ E

0 A6 J/ ]- W, E/ o" T. O3 Y/ g+ _   3.  重新生成grub! S4 L! d% _% R1 L* G
   EFI: w8 p/ {, _  t6 y$ f
   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
) M* Y6 {- r5 Z3 Z; M) {" z/ \- \   非EFI
  A& u3 u1 h) L- c( Z( F/ L& }   grub2-mkconfig -o /boot/grub2/grub.cfg3 z3 P4 r9 a# Q! Y2 }' c* x& @' h2 r! Y
4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件( g/ ^! R/ A( a  ]5 q% t
vim  /etc/modprobe.d/blacklist.conf) f( e: c# K( ?. ?
blacklist snd_hda_intel3 }! x& X) E3 m* r. m
blacklist amd76x_edac
7 r( K6 Q9 w2 G$ U1 Kblacklist vga16fb
4 K9 j* V% f# M. c7 s3 O# Ublacklist nouveau
8 u5 E, U: p5 \blacklist rivafb$ T% ]% U: Q' b9 V6 C
blacklist nvidiafb
; x1 D$ m! k& |; U# Bblacklist rivatv. D8 Z6 F* Z- s+ Y0 }' v7 X
blacklist nvidia# o" @0 k: d  [
5 A/ n2 q1 S7 ^# ~
5.  查找显卡的Product ID 以及 Vendor ID:/ j; d5 s% s( v7 Q( K- z! ^8 \# Z
yum install pciutils -y8 D" ^6 G* E! M" y0 G
lspci -nn | grep NVIDIA/ g$ R- s( b6 R# Z: Y, J2 K
如下:& Z9 A+ d. ?* W) W3 W* W
[root@stein-a ~]#
0 |: N! J% W( l: T7 k03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)2 F  o6 _7 l4 [% D2 x
03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)
* O8 ]4 u, ~# b* x- v1 p) h% r4 R8 ?
6.  编辑
* s  n( v! a8 M4 A, u& Fvim /etc/modprobe.d/vfio.conf
3 ~0 }+ Y  L% b2 k( e3 H# create new: for [ids=***], specify [vendor-ID:device-ID]
+ Y" p4 E! B  `6 S  xoptions vfio-pci ids=10de:1bb1,10de:10f0. O& u5 W/ I  J% U0 K
/ s& x" X1 ^# M+ D( u  Q
7.  写入到系统启动项, i- k  g3 i( N; f* S+ a5 j
echo 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf
) N8 P" {8 D* O( W6 \5 w7 h; b7 W% b/ ^- F
8.  重新生成initramfs* f) T& |+ t+ W( j  N+ Q% j% C/ o
mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r).img.bak
8 d/ x. _2 v1 ^" T, e# G/ cdracut -v /boot/initramfs-$(uname -r).img $(uname -r)
$ l. [6 A' c- O
- {8 b* M/ ]5 c) f: g# P* }9.  重启系统
0 B& ^) \& `1 J! v$ X* q) Hreboot
) i2 k4 x# i" y  z# j, w+ X" l& @- h/ {2 H/ _
10. 验证
3 J1 J; b0 U2 M4 Slspci -nnk -d 10de:1bb1
) a7 K) W$ Z+ ^dmesg | grep -i vfio
: t& a8 ^# N5 _: n0 m$ ?0 z: h[root@stein-a ~]# lspci -nnk -d 10de:1bb1
* Y6 f5 ]* V7 {" I8 p( e7 u3 l03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1): S; x. P9 c& @0 D
        Subsystem: NVIDIA Corporation Device [10de:11a3]6 i) x9 G- [8 `5 {- c/ }
        Kernel driver in use: vfio-pci
0 _* I/ y) N, ]$ Y+ b: k        Kernel modules: nouveau
. i3 P" @. Q+ a, y, p8 Y* _* e[root@stein-a ~]# dmesg | grep -i vfio! K7 H" F" d  F( W. [
[    2.503115] VFIO - User Level meta-driver version: 0.37 S9 S. J" V! U9 K
[    2.515645] vfio_pci: add [10de:1bb1[ffff:ffff]] class 0x000000/00000000* n5 y9 |( ~' R! s% \- o
[    2.515752] vfio_pci: add [10de:10f0[ffff:ffff]] class 0x000000/00000000
: {+ H' l0 i/ K. e  s' \. Q3 J1 \[root@stein-a ~]#
% I/ }$ |" f6 {5 f0 |" z4 n% E复制代码3 u' ?: H+ I3 u: ^

% H) d4 F3 R1 @* [2. Ubuntu18.04系列为虚拟机配置GPU直通% }/ U& c- k1 l% Y. F
复制代码- k, U4 ^! V. b4 k! }$ G8 G4 ~
1. 编辑文件vim  /etc/modules, 添加以下内容:! z  ]- M. v) X9 D% N
pci_stub
& K# F6 L9 g5 O8 \  F" I2 lvfio5 j% q3 Y8 A2 ^8 M# f) Z; O, j
vfio_iommu_type1
. Y+ s' ]- U+ Fvfio_pci
7 x: o  G3 T+ |7 |: {! y6 Ikvm
/ w7 X' h3 O- ]* F4 C+ J3 ikvm_intel
3 ^# S; s$ [$ b. \* C; X8 j. s/ _* p' b3 ~
2. 在KVM主机上启用IOMMU
& V; U# x2 ^. t; U5 t5 O#对于Intel芯片:
( T* F5 L+ O, Z7 e  QGRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on"
2 D5 }$ L- B3 J2 a4 n8 O3 e& U1 W#对于AMD芯片:  Z& |6 n' [, i, j" V! j
GRUB_CMDLINE_LINUX_DEFAULT="iommu=pt iommu=1"
  D: {# m# v4 v4 h' I
9 x5 l8 b+ Q* U  x7 Cvim /etc/default/grub# ?' O6 x0 I  M* w$ Y) ]  h

" M. P9 k* h8 G7 F: Q& eGRUB_DEFAULT=0
9 v6 Q2 C9 S. _" @7 d8 oGRUB_TIMEOUT_STYLE=hidden
+ ]: P$ t" v" U0 @GRUB_TIMEOUT=0
: c" ~/ Y9 |% mGRUB_DISTRIBUTOR=`lsb_release -i -s 2> /dev/null || echo Debian`1 ]8 Y( q1 e0 d( M, q
GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on") u6 u- T' f2 R4 Q" M3 E
GRUB_CMDLINE_LINUX=""- b, j* A, ]9 O6 D0 V8 J- O, h% T
; n* K% L! e/ c7 q1 w& O" N
   3.  重新生成grub3 @; Z9 U1 f& y' h8 ~/ p% {( w/ h
   EFI
, G  Z* a3 J; l5 y$ B   grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg8 R: V& ?) {" z# G+ G' _8 B
   非EFI # |2 i5 z( ?# Y) R
   grub2-mkconfig -o /boot/grub2/grub.cfg
+ z0 r9 M) O9 i, ^4 R- U4 ^* s9 K4.  将下列内容加入到blacklist中以避免被宿主机占用,编辑文件
, J' U3 G# ]+ H+ P# q9 Lvim  /etc/modprobe.d/blacklist.conf
# J4 y1 W; o* \% rblacklist snd_hda_intel7 n7 |/ X, n3 }& Y
blacklist amd76x_edac" D; `1 _- a7 f; P5 Y! @
blacklist vga16fb
+ Q- @! {1 z4 Yblacklist nouveau- V- W* F) m+ }7 V$ r1 l1 ^9 V: z
blacklist rivafb
/ B7 Z1 D5 U/ ^blacklist nvidiafb- ?7 X7 e6 _- R. r
blacklist rivatv% s, g6 C1 {% {+ W# j% W
blacklist nvidia) A; K; \3 m' ?7 ^4 i8 j; T
7 F9 c0 z1 o4 r
5.  查找显卡的Product ID 以及 Vendor ID:
% U, _1 [  s# c0 k: Eapt install pciutils -y
7 t/ e$ y" i$ ^; V4 Slspci -nn | grep NVIDIA0 D9 y( v0 A0 X8 [: m' z& r
如下:; w  q" f  N& d
[root@stein-a ~]# lspci -nn | grep NVIDIA
& o" X6 a/ t( U. G, r03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)
$ L0 A1 p  T4 z- I03:00.1 Audio device [0403]: NVIDIA Corporation GP104 High Definition Audio Controller [10de:10f0] (rev a1)3 `" y& d" X5 `' H; U% H! O
5 Q4 V! k2 a  H# X; T+ S$ |
6.  编辑
" X, u' n- A- E1 _vim /etc/modprobe.d/vfio.conf; v6 ]- u& N( k& S) }; p8 v0 v
# create new: for [ids=***], specify [vendor-ID:device-ID]
' p6 ~% m1 l+ G. V' Doptions vfio-pci ids=10de:1bb1,10de:10f0
6 ]+ x2 y/ l% w- `# e$ q( [% [+ I5 L% B9 C' [
7.  写入到系统启动项
5 p0 j* i% B6 G! m2 zecho 'vfio-pci' > /etc/modules-load.d/vfio-pci.conf
6 d* [$ k0 k5 W7 q) J/ D
0 u3 t! z3 `2 \; X. T$ I8.  重新生成initramfs- J( z5 |! F, x8 N% G
dracut -v /boot/initramfs-$(uname -r).img $(uname -r): D7 Z/ h  f( e. r! U4 ?
' t- A+ j0 H" e& `+ A
9.  重启系统
$ A" ?% u8 S. s8 Ereboot
6 E( f6 \2 @4 @2 f5 Q
& H" Q0 P7 i& ^4 j, K10. 验证" ?2 l  u2 A' x! n* f& W
lspci -nnk -d 10de:1bb1- R0 w+ f$ |- i" t4 p
dmesg | grep -i vfio
" `9 v" X# N- @2 i6 Oroot@kvm:~# lspci -nnk -d 10de:1bb16 g1 F; g" E3 n0 Q  d% L
dmesg | grep -i vfio3 N' v7 A' I3 K% b
03:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104GL [Quadro P4000] [10de:1bb1] (rev a1)0 m; t* m8 s3 E( I
        Subsystem: NVIDIA Corporation GP104GL [Quadro P4000] [10de:11a3]
# n3 ]3 ^7 }6 K: C$ c  Y1 ~$ g# m        Kernel driver in use: vfio-pci
8 Z% }! x5 L# g' z$ Q2 P, Q7 v        Kernel modules: nvidiafb, nouveau
7 U; h, X, }/ M" J) ~root@kvm:~# dmesg | grep -i vfio! q7 ?1 J! a( u
[    3.838714] VFIO - User Level meta-driver version: 0.3
( v# Z  e: O% ]4 b) _7 ?- Y[    3.846238] vfio-pci 0000:03:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none. M* B6 Z: u+ K/ V- R
[    3.866370] vfio_pci: add [10de:1bb1[ffffffff:ffffffff]] class 0x000000/00000000, s/ K# O; d- n: \
[    3.886375] vfio_pci: add [10de:10f0[ffffffff:ffffffff]] class 0x000000/00000000- @$ E6 e) X% l2 w( j0 d+ A% V
复制代码4 x5 z  {9 N4 U2 k( @$ @# d

: Q( b$ V+ F, F/ I5 j复制代码" Y9 \8 ~# s4 n, {9 D* U! E$ I8 v
#如果你单机部署的,在单机下配置。4 z) [% `6 I4 t  q6 |
#如果你是高可用部署的,在三台控制节点配置
- k1 O$ s$ U- G1. 添加pci" ^; R( I! X, ]& k9 o
vim /etc/kolla/config/nova/nova-compute.conf5 y4 F2 q( E) s5 [9 ~) D, h# r
[libvirt]
$ {! J4 t; [6 Ninject_password=true
' Z9 u  B8 W% v; @cpu_mode=host-passthrough
5 b- o! x* U; }! Uvirt_type = kvm
) ?7 {3 d  X2 Q0 R* Q[pci]- [- B$ Q- u. Z" l$ |+ ]5 n7 {
passthrough_whitelist: { "vendor_id": "10de", "product_id": "1bb1" }
, s8 ]/ _5 H( Y) Z( H2 R( B( Z  V# G8 T$ S
2. 修改nova.conf
$ O4 b- C7 N4 O6 b, W5 b3 f. ovim /etc/kolla/config/nova.conf
* f* A- `$ N+ d; K* [8 U. N" ^4 t[DEFAULT]; ~; D" C: o9 X. ^0 a
service_down_time = 120, G. g. @* L% ^6 K
cpu_allocation_ratio = 4.0   0 ^8 F  U" n. x3 s* x
disk_allocation_ratio=1.05 ~' N+ y3 U7 K- ]
ram_allocation_ratio = 1.0   
0 M, E! a2 F3 E( I6 t8 z; D0 I& `reserved_host_disk_mb = 4096 # G3 R* B; O5 j+ c# w3 o7 ^0 `
reserved_host_memory_mb = 4096 # l2 k' D  X* L/ T; D: a8 C, L
allow_resize_to_same_host = True   
7 y2 V9 ?6 j$ L1 {' yremove_unused_base_images = False8 G/ i6 Z0 m: x6 H  x3 s
image_cache_manager_interval = 0
; {6 F5 [* B9 \8 Z5 tresume_guests_state_on_host_boot = True4 A6 V1 o) M' d5 E
' N, w8 Y1 J0 l$ ?/ e$ m2 J
[PCI]
) r, t3 }: G) |2 C) Aalias: { "vendor_id":"10de", "product_id":"1bb1", "device_type":"type-PCI", "name":"quadro-p4000" }
  m5 N/ {! K; D( ~1 M[filter_scheduler]
* K( a3 b4 D7 ~& y5 x1 E. ]. r- Denabled_filters = RetryFilter, AvailabilityZoneFilter, ComputeFilter, ComputeCapabilitiesFilter, ImagePropertiesFilter, ServerGroupAntiAffinityFilter, ServerGroupAffinityFilter, PciPassthroughFilter
8 `5 s! I. |6 ]6 M7 @' uavailable_filters = nova.scheduler.filters.all_filters1 N% e) w1 V7 S# C% o
6 A4 O9 |: @7 y, k+ ^
3. GPU 类型实例创建  3 t, C  L/ ~; {8 e
openstack flavor create --vcpus 4 --ram 8192 --disk 30 --property "pci_passthrough:alias"="quadro-p400:1" g1.4c.8m.p400/ m* h+ T2 T( r0 r  Z* `
复制代码4 J( D) A" h5 {# ~4 X8 _
4 ~5 x& v" f9 u6 a
3. CentOS7.x系列 安装显卡驱动3 u" |- s1 \1 V! a  u
复制代码/ w% l  u# ?* ]8 v+ }
1.  查看是否含有英伟达显卡  B( u2 l- \+ V% f
lspci | grep -i NVIDIA: k7 j0 }& e4 m7 T
#下面说明有1块英伟达的显卡6 i- n% I5 C& n
[root@train-all ~]#  lspci | grep -i NVIDIA
& O' K( M/ |) S' {) d) p04:00.0 VGA compatible controller: NVIDIA Corporation GP104GL [Quadro P4000] (rev a1)
1 Q9 M, g: B8 S3 C, i$ A: b04:00.1 Audio device: NVIDIA Corporation GP104 High Definition Audio Controller (rev a1)4 o% H0 ^3 \2 J+ f% D0 f5 t
[root@train-all ~]#
% _9 c4 |6 a% H& S, J/ H
/ v/ ?% r/ u" z8 N6 ?  R2.  添加ELRepo源
$ \) x1 l) v% F. T; Nrpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org , W# w0 I3 S. }
( t. `  W0 u# Y5 A
3.  安装ELRepo  v/ `8 D0 @  g! p5 q
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
; I  G' h5 q4 O2 Q: o' D4 o; V; F) z/ d; x
4.  安装nvidia-detect
/ J* y. t9 F* ~6 X* W' s1 y* {/ `7 Nyum install nvidia-detect -y+ P- ?. Z; p6 q* y& w7 g
' C/ R% Y# c/ b* K- a
5.  运行nvidia-detect" w( a6 \  F! q) D$ x# a  [$ K
nvidia-detect -v
# x( R& ^: k" l) x; T9 \
( o9 v- L3 H) B  y0 R6.  查找驱动程序
; H' \" i3 `1 Q* m$ V, T) L& tyum search kmod-nvidia+ ^3 S; d. o- y- D$ o, H
6 O9 S6 S# W: L! s- c1 g
7.  安装驱动程序
7 X, R, V5 r( N0 [% p- G- wyum install kmod-nvidia.x86_64 -y. v/ f( m* G' f6 u' e+ e& p- I; g& X
6 z5 m! [5 K  ?, H7 o
8.  查看禁用Nouveau
5 m5 f2 j2 u  Nlsmod | grep nouveau
6 h: O' h. v( S1 T; Y6 z#若没有输出 则说明禁用成功,否则执行下面的命令
# x( e/ Y5 T: O. x* }8 A; _; c% X9 {6 l" C0 P: p3 S
9.  在/etc/modprobe.d/blacklist-nouveau.conf中创建一个文件,其内容如下:+ S2 S) c( I, z) s1 v- a
vi /etc/modprobe.d/blacklist-nouveau.conf+ v. k2 z4 q  _9 u
添加: Q) u. M5 T8 d/ V
blacklist nouveau# ]2 N6 t! [5 K1 L7 Z: a
options nouveau modeset=0
$ Z, n6 K3 [/ I  e: L) s+ t  D* Z) Z
10. 重新生成内核initramfs7 C" r* i7 X0 h+ q4 @* U) V: y
dracut --force1 p2 N7 R' C7 a) a- l/ ?
6 ~" ]$ ]0 P% I) Q7 n, X! Q% G0 T
11.  重启系统
, Z! ^( v" w" y2 D$ B( w1 areboot  \/ H7 r% F8 j$ U

" Q) @1 g/ v# ~8 Z6 U( N12.  测试
! \# G) j& R/ w$ z2 ^7 c2 q- tnvidia-smi/ l  q( N+ A3 n& J9 P
您需要登录后才可以回帖 登录 | 开始注册

本版积分规则

关闭

站长推荐上一条 /4 下一条

北京云银创陇科技有限公司以云计算运维,代码开发

QQ|返回首页|Archiver|小黑屋|易陆发现技术论坛 点击这里给我发消息

GMT+8, 2026-4-8 15:22 , Processed in 0.045049 second(s), 22 queries .

Powered by Discuz! X3.4 Licensed

© 2012-2025 Discuz! Team.

快速回复 返回顶部 返回列表