| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- // Copyright 2019 Yunion
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package container_device
- import (
- "fmt"
- "strconv"
- "strings"
- runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
- "yunion.io/x/log"
- "yunion.io/x/pkg/errors"
- hostapi "yunion.io/x/onecloud/pkg/apis/host"
- "yunion.io/x/onecloud/pkg/hostman/isolated_device"
- "yunion.io/x/onecloud/pkg/util/procutils"
- )
- func init() {
- isolated_device.RegisterContainerDeviceManager(newAscendNPUManager())
- }
- type ascendNPUManager struct{}
- func (m *ascendNPUManager) GetContainerExtraConfigures(devs []*hostapi.ContainerDevice) ([]*runtimeapi.KeyValue, []*runtimeapi.Mount) {
- npus := []string{}
- for _, dev := range devs {
- if dev.IsolatedDevice == nil {
- continue
- }
- if isolated_device.ContainerDeviceType(dev.IsolatedDevice.DeviceType) != isolated_device.ContainerDeviceTypeAscendNpu {
- continue
- }
- npus = append(npus, dev.IsolatedDevice.Path)
- }
- if len(npus) == 0 {
- return nil, nil
- }
- var (
- ASCEND_TOOLKIT_HOME = "/usr/local/Ascend/ascend-toolkit/latest"
- LD_LIBRARY_PATH = fmt.Sprintf("/usr/local/Ascend/driver/lib64:/usr/local/Ascend/driver/lib64/common:/usr/local/Ascend/driver/lib64/driver:"+
- "%s/lib64:%s/lib64/plugin/opskernel:%s/lib64/plugin/nnengine", ASCEND_TOOLKIT_HOME, ASCEND_TOOLKIT_HOME, ASCEND_TOOLKIT_HOME)
- ASCEND_AICPU_PATH = ASCEND_TOOLKIT_HOME
- ASCEND_OPP_PATH = fmt.Sprintf("%s/opp", ASCEND_TOOLKIT_HOME)
- TOOLCHAIN_HOME = fmt.Sprintf("%s/toolkit", ASCEND_TOOLKIT_HOME)
- ASCEND_HOME_PATH = ASCEND_AICPU_PATH
- )
- return []*runtimeapi.KeyValue{
- {
- Key: "ASCEND_TOOLKIT_HOME",
- Value: ASCEND_TOOLKIT_HOME,
- }, {
- Key: "LD_LIBRARY_PATH",
- Value: LD_LIBRARY_PATH,
- }, {
- Key: "ASCEND_AICPU_PATH",
- Value: ASCEND_AICPU_PATH,
- }, {
- Key: "ASCEND_OPP_PATH",
- Value: ASCEND_OPP_PATH,
- }, {
- Key: "TOOLCHAIN_HOME",
- Value: TOOLCHAIN_HOME,
- }, {
- Key: "ASCEND_HOME_PATH",
- Value: ASCEND_HOME_PATH,
- },
- }, []*runtimeapi.Mount{
- {
- ContainerPath: "/usr/local/Ascend",
- HostPath: "/usr/local/Ascend",
- Readonly: true,
- },
- {
- ContainerPath: "/usr/local/dcmi",
- HostPath: "/usr/local/dcmi",
- Readonly: true,
- },
- {
- ContainerPath: "/usr/local/bin/npu-smi",
- HostPath: "/usr/local/bin/npu-smi",
- Readonly: true,
- },
- }
- }
- func newAscendNPUManager() *ascendNPUManager {
- return &ascendNPUManager{}
- }
- func (m *ascendNPUManager) ProbeDevices() ([]isolated_device.IDevice, error) {
- return getAscendNpus()
- }
- func (m *ascendNPUManager) NewDevices(dev *isolated_device.ContainerDevice) ([]isolated_device.IDevice, error) {
- return nil, nil
- }
- func (m *ascendNPUManager) NewContainerDevices(input *hostapi.ContainerCreateInput, dev *hostapi.ContainerDevice) ([]*runtimeapi.Device, []*runtimeapi.Device, error) {
- return []*runtimeapi.Device{
- &runtimeapi.Device{
- ContainerPath: dev.IsolatedDevice.Path,
- HostPath: dev.IsolatedDevice.Path,
- Permissions: "rwm",
- },
- }, []*runtimeapi.Device{
- &runtimeapi.Device{
- ContainerPath: "/dev/davinci_manager",
- HostPath: "/dev/davinci_manager",
- Permissions: "rwm",
- },
- &runtimeapi.Device{
- ContainerPath: "/dev/devmm_svm",
- HostPath: "/dev/devmm_svm",
- Permissions: "rwm",
- },
- &runtimeapi.Device{
- ContainerPath: "/dev/hisi_hdc",
- HostPath: "/dev/hisi_hdc",
- Permissions: "rwm",
- },
- }, nil
- }
- func (m *ascendNPUManager) GetType() isolated_device.ContainerDeviceType {
- return isolated_device.ContainerDeviceTypeAscendNpu
- }
- type ascnedNPU struct {
- *BaseDevice
- }
- func getAscendNpus() ([]isolated_device.IDevice, error) {
- devs := make([]isolated_device.IDevice, 0)
- // Show all device's topology information
- out, err := procutils.NewRemoteCommandAsFarAsPossible("npu-smi", "info").Output()
- if err != nil {
- return nil, errors.Wrap(err, "npu-smi")
- }
- lines := strings.Split(string(out), "\n")
- for i := 6; i < len(lines); i += 3 {
- if !strings.HasPrefix(lines[i], "|") {
- continue
- }
- if len(lines) <= (i + 1) {
- return nil, errors.Errorf("failed parse npu-smi unknown chip line")
- }
- fields := strings.Fields(lines[i])
- if len(fields) < 3 {
- return nil, errors.Errorf("failed parse npu-smi unknown npu line")
- }
- log.Debugf("fields %v", fields)
- strNpuID := fields[1]
- npuId, err := strconv.Atoi(strNpuID)
- if err != nil {
- log.Warningf("failed parse npuid %s: %s. break", strNpuID, err)
- break
- }
- npuName := fields[2]
- devPath := fmt.Sprintf("/dev/davinci%d", npuId)
- fileds2 := strings.Fields(lines[i+1])
- if len(fileds2) < 4 {
- return nil, errors.Errorf("failed parse npu-smi unknonw chip line get busid")
- }
- log.Debugf("fileds2 %v", fileds2)
- busID := fileds2[3]
- pciOutput, err := isolated_device.GetPCIStrByAddr(busID)
- if err != nil {
- return nil, errors.Wrapf(err, "GetPCIStrByAddr %s", busID)
- }
- dev := isolated_device.NewPCIDevice2(pciOutput[0])
- npuDev := &ascnedNPU{
- BaseDevice: NewBaseDevice(dev, isolated_device.ContainerDeviceTypeAscendNpu, devPath),
- }
- npuDev.SetModelName(npuName)
- devs = append(devs, npuDev)
- }
- if len(devs) == 0 {
- return nil, nil
- }
- return devs, nil
- }
|