hostinfo.go 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package hostinfo
  15. import (
  16. "bytes"
  17. "context"
  18. "fmt"
  19. "math"
  20. "net"
  21. "os"
  22. "path"
  23. "reflect"
  24. "regexp"
  25. "strconv"
  26. "strings"
  27. "sync"
  28. "syscall"
  29. "time"
  30. "github.com/vishvananda/netlink"
  31. "golang.org/x/sync/errgroup"
  32. "yunion.io/x/jsonutils"
  33. "yunion.io/x/log"
  34. "yunion.io/x/pkg/errors"
  35. "yunion.io/x/pkg/util/httputils"
  36. "yunion.io/x/pkg/util/regutils"
  37. "yunion.io/x/pkg/util/version"
  38. "yunion.io/x/pkg/utils"
  39. "yunion.io/x/onecloud/pkg/apis"
  40. api "yunion.io/x/onecloud/pkg/apis/compute"
  41. hostapi "yunion.io/x/onecloud/pkg/apis/host"
  42. identityapi "yunion.io/x/onecloud/pkg/apis/identity"
  43. napi "yunion.io/x/onecloud/pkg/apis/notify"
  44. "yunion.io/x/onecloud/pkg/cloudcommon/consts"
  45. "yunion.io/x/onecloud/pkg/cloudcommon/notifyclient"
  46. "yunion.io/x/onecloud/pkg/cloudcommon/tsdb"
  47. "yunion.io/x/onecloud/pkg/cloudcommon/types"
  48. _ "yunion.io/x/onecloud/pkg/hostman/container/storage/local_raw"
  49. _ "yunion.io/x/onecloud/pkg/hostman/container/storage/rbd"
  50. "yunion.io/x/onecloud/pkg/hostman/guestfs/fsdriver"
  51. "yunion.io/x/onecloud/pkg/hostman/hostinfo/hostbridge"
  52. "yunion.io/x/onecloud/pkg/hostman/hostinfo/hostconsts"
  53. "yunion.io/x/onecloud/pkg/hostman/hostutils"
  54. "yunion.io/x/onecloud/pkg/hostman/hostutils/hardware"
  55. "yunion.io/x/onecloud/pkg/hostman/hostutils/kubelet"
  56. "yunion.io/x/onecloud/pkg/hostman/isolated_device"
  57. _ "yunion.io/x/onecloud/pkg/hostman/isolated_device/container_device"
  58. "yunion.io/x/onecloud/pkg/hostman/monitor"
  59. "yunion.io/x/onecloud/pkg/hostman/options"
  60. "yunion.io/x/onecloud/pkg/hostman/storageman"
  61. _ "yunion.io/x/onecloud/pkg/hostman/storageman/container_storage"
  62. "yunion.io/x/onecloud/pkg/hostman/system_service"
  63. "yunion.io/x/onecloud/pkg/httperrors"
  64. "yunion.io/x/onecloud/pkg/mcclient"
  65. "yunion.io/x/onecloud/pkg/mcclient/auth"
  66. modules "yunion.io/x/onecloud/pkg/mcclient/modules/compute"
  67. "yunion.io/x/onecloud/pkg/util/cgrouputils"
  68. "yunion.io/x/onecloud/pkg/util/cgrouputils/cpuset"
  69. "yunion.io/x/onecloud/pkg/util/fileutils2"
  70. "yunion.io/x/onecloud/pkg/util/k8s/tokens"
  71. "yunion.io/x/onecloud/pkg/util/logclient"
  72. "yunion.io/x/onecloud/pkg/util/netutils2"
  73. "yunion.io/x/onecloud/pkg/util/ovnutils"
  74. "yunion.io/x/onecloud/pkg/util/pod"
  75. "yunion.io/x/onecloud/pkg/util/pod/stats"
  76. "yunion.io/x/onecloud/pkg/util/procutils"
  77. "yunion.io/x/onecloud/pkg/util/qemutils"
  78. "yunion.io/x/onecloud/pkg/util/sysutils"
  79. )
  80. type SHostInfo struct {
  81. isRegistered bool
  82. // IsRegistered chan struct{}
  83. // registerCallback func()
  84. stopped bool
  85. isLoged bool
  86. saved bool
  87. Cpu *SCPUInfo
  88. Mem *SMemory
  89. sysinfo *SSysInfo
  90. qemuMachineInfoList []monitor.MachineInfo
  91. kvmMaxCpus uint
  92. kubeletConfig kubelet.KubeletConfig
  93. isInit bool
  94. onHostDown string
  95. reservedCpusInfo *api.HostReserveCpusInput
  96. guestPinnedCpus []int
  97. enableNumaAllocate bool
  98. cpuCmtBound float32
  99. memCmtBound float32
  100. IsolatedDeviceMan isolated_device.IsolatedDeviceManager
  101. MasterNic *netutils2.SNetInterface
  102. Nics []*SNIC
  103. HostId string
  104. Zone string
  105. ZoneId string
  106. Cloudregion string
  107. CloudregionId string
  108. ZoneManagerUri string
  109. Project_domain string
  110. Domain_id string
  111. FullName string
  112. SysError map[string][]api.HostError
  113. IoScheduler string
  114. // container related members
  115. cri pod.CRI
  116. containerCPUMap *pod.HostContainerCPUMap
  117. containerStatsProvider stats.ContainerStatsProvider
  118. containerCpufreqSimulateConfig *jsonutils.JSONDict
  119. containerNvidiaGpus []isolated_device.IDevice
  120. hasNvidiaGpus *bool
  121. hasVastaitechGpus *bool
  122. hasCphAmdGpus *bool
  123. guestManager hostutils.IGuestManager
  124. }
  125. func (h *SHostInfo) GetContainerDeviceConfigurationFilePath() string {
  126. return options.HostOptions.ContainerDeviceConfigFile
  127. }
  128. func (h *SHostInfo) GetContainerCpufreqSimulateConfig() *jsonutils.JSONDict {
  129. return h.containerCpufreqSimulateConfig
  130. }
  131. func (h *SHostInfo) SetIGuestManager(guestManager hostutils.IGuestManager) {
  132. h.guestManager = guestManager
  133. }
  134. func (h *SHostInfo) GetIGuestManager() hostutils.IGuestManager {
  135. return h.guestManager
  136. }
  137. func (h *SHostInfo) GetIsolatedDeviceManager() isolated_device.IsolatedDeviceManager {
  138. return h.IsolatedDeviceMan
  139. }
  140. func (h *SHostInfo) GetBridgeDev(bridge string) hostbridge.IBridgeDriver {
  141. bridgeDev := options.HostOptions.NicBridgeDevName(bridge)
  142. for _, n := range h.Nics {
  143. if bridgeDev == n.Bridge {
  144. return n.BridgeDev
  145. }
  146. }
  147. if bridge == options.HostOptions.OvnIntegrationBridge || bridge == api.HostVpcBridge {
  148. drv, err := hostbridge.NewOVSBridgeDriverByName(bridge)
  149. if err != nil {
  150. log.Errorf("create ovn bridge driver: %v", err)
  151. return nil
  152. }
  153. return drv
  154. } else if bridge == api.HostTapBridge {
  155. drv, err := hostbridge.NewOVSBridgeDriverByName(options.HostOptions.TapBridgeName)
  156. if err != nil {
  157. log.Errorf("create ovn bridge driver: %v", err)
  158. return nil
  159. }
  160. return drv
  161. }
  162. return nil
  163. }
  164. func (h *SHostInfo) StartDHCPServer() {
  165. for _, nic := range h.Nics {
  166. if nic.dhcpServer != nil {
  167. nic.dhcpServer.Start(false)
  168. }
  169. if nic.dhcpServer6 != nil {
  170. nic.dhcpServer6.Start(false)
  171. }
  172. }
  173. }
  174. func (h *SHostInfo) GetHostId() string {
  175. return h.HostId
  176. }
  177. func (h *SHostInfo) GetZoneId() string {
  178. return h.ZoneId
  179. }
  180. /*func (h *SHostInfo) GetMediumType() string {
  181. if h.sysinfo != nil {
  182. return h.sysinfo.StorageType
  183. }
  184. return ""
  185. }*/
  186. func (h *SHostInfo) IsKvmSupport() bool {
  187. return sysutils.IsKvmSupport()
  188. }
  189. func (h *SHostInfo) IsNestedVirtualization() bool {
  190. return utils.IsInStringArray("hypervisor", h.Cpu.cpuFeatures)
  191. }
  192. func (h *SHostInfo) IsHugepagesEnabled() bool {
  193. return options.HostOptions.HugepagesOption == "native"
  194. }
  195. func (h *SHostInfo) HugepageSizeKb() int {
  196. return h.sysinfo.HugepageSizeKb
  197. }
  198. /* In this order init host service:
  199. * 1. prepare env, fix environment variable path
  200. * 2. detect hostinfo, fill host capability and custom host field
  201. * 3. prepare hostbridge, start openvswitch service
  202. * 4. parse host config, config ip address
  203. * 5. check is ovn support, setup ovn chassis
  204. */
  205. func (h *SHostInfo) Init(ctx context.Context) error {
  206. if err := h.prepareEnv(); err != nil {
  207. return errors.Wrap(err, "Prepare environment")
  208. }
  209. log.Infof("Start detectHostInfo")
  210. if err := h.detectHostInfo(); err != nil {
  211. return errors.Wrap(err, "detectHostInfo")
  212. }
  213. if err := hostbridge.Prepare(options.HostOptions.BridgeDriver); err != nil {
  214. return errors.Wrapf(err, "Prepare host bridge %q", options.HostOptions.BridgeDriver)
  215. }
  216. log.Infof("Start parseConfig")
  217. if err := h.parseConfig(); err != nil {
  218. return errors.Wrap(err, "parseConfig")
  219. }
  220. if ovnutils.HasOvnSupport() && !options.HostOptions.DisableLocalVpc {
  221. if err := h.setupOvnChassis(); err != nil {
  222. return errors.Wrap(err, "Setup OVN Chassis")
  223. }
  224. }
  225. if h.IsContainerHost() {
  226. if err := h.initCRI(); err != nil {
  227. return errors.Wrap(err, "init container runtime interface")
  228. }
  229. if err := h.initContainerCPUMap(h.sysinfo.Topology); err != nil {
  230. return errors.Wrap(err, "init container cpu map")
  231. }
  232. go func() {
  233. if err := h.startContainerStatsProvider(h.cri); err != nil {
  234. log.Warningf("start container stats provider error: %v", err)
  235. } else {
  236. log.Infof("container stats provider started")
  237. }
  238. }()
  239. if fileutils2.Exists(options.HostOptions.ContainerSystemCpufreqSimulateConfigFile) {
  240. if err := h.getContainerCpufreqSimulateConfig(); err != nil {
  241. return errors.Wrap(err, "getContainerCpuSimulateConfig")
  242. }
  243. }
  244. }
  245. return nil
  246. }
  247. func (h *SHostInfo) getContainerCpufreqSimulateConfig() error {
  248. content, err := fileutils2.FileGetContents(options.HostOptions.ContainerSystemCpufreqSimulateConfigFile)
  249. if err != nil {
  250. return errors.Wrapf(err, "FileGetContents %s", options.HostOptions.ContainerSystemCpufreqSimulateConfigFile)
  251. }
  252. obj, err := jsonutils.ParseYAML(content)
  253. if err != nil {
  254. return errors.Wrapf(err, "parse YAML content: %s", content)
  255. }
  256. cfg := new(hostutils.SContainerCpufreqSimulateConfig)
  257. if err := obj.Unmarshal(cfg); err != nil {
  258. return errors.Wrapf(err, "unmarshal object to SContainerCpufreqSimulateConfig")
  259. }
  260. h.containerCpufreqSimulateConfig = jsonutils.Marshal(cfg).(*jsonutils.JSONDict)
  261. return nil
  262. }
  263. func (h *SHostInfo) setupOvnChassis() error {
  264. opts := &options.HostOptions
  265. if opts.BridgeDriver != hostbridge.DRV_OPEN_VSWITCH {
  266. return nil
  267. }
  268. log.Infof("Start setting up ovn chassis")
  269. oh := NewOvnHelper(h)
  270. if err := oh.Init(); err != nil {
  271. return err
  272. }
  273. return nil
  274. }
  275. func (h *SHostInfo) generateLocalNetworkConfig() (string, error) {
  276. netIp, dev, err := netutils2.DefaultSrcIpDev()
  277. if err != nil {
  278. return "", errors.Wrap(err, "find default source address & device")
  279. }
  280. log.Infof("Find dev: %s ip: %s", dev, netIp)
  281. var bridgeName string
  282. // test if dev is bridge
  283. if err := procutils.NewCommand("ovs-vsctl", "br-exists", dev).Run(); err == nil {
  284. portStr, err := procutils.NewCommand("ovs-vsctl", "list-ports", dev).Output()
  285. if err != nil {
  286. return "", errors.Wrap(err, "list port")
  287. }
  288. ports := strings.Split(string(portStr), "\n")
  289. devs := []string{}
  290. for i := 0; i < len(ports); i++ {
  291. portName := strings.TrimSpace(ports[i])
  292. if len(portName) > 0 {
  293. lk, err := netlink.LinkByName(portName)
  294. if err != nil {
  295. log.Errorf("netlink.LinkByName %s failed %s", portName, err)
  296. continue
  297. } else {
  298. log.Infof("port %s link type %s", portName, lk.Type())
  299. if !utils.IsInStringArray(lk.Type(), []string{"veth", "tun"}) {
  300. devs = append(devs, portName)
  301. }
  302. }
  303. }
  304. }
  305. if len(devs) != 1 {
  306. return "", fmt.Errorf("list ports of br got %v", dev)
  307. }
  308. bridgeName = dev
  309. dev = devs[0]
  310. } else {
  311. log.Errorf("br-exists %s get error %s", dev, err)
  312. // test if dev is port of bridge
  313. output, err := procutils.NewCommand("ovs-vsctl", "port-to-br", dev).Output()
  314. if err != nil && !strings.Contains(string(output), "no port named") {
  315. return "", errors.Wrapf(err, "port to br failed %s", output)
  316. } else if err == nil {
  317. bridgeName = strings.TrimSpace(string(output))
  318. }
  319. }
  320. if len(bridgeName) == 0 {
  321. bridgeName = "br"
  322. index := 0
  323. for {
  324. if _, err := net.InterfaceByName(bridgeName + strconv.Itoa(index)); err != nil {
  325. bridgeName = bridgeName + strconv.Itoa(index)
  326. break
  327. }
  328. index += 1
  329. }
  330. }
  331. log.Infof("bridge name %s", bridgeName)
  332. return fmt.Sprintf("%s/%s/%s", dev, bridgeName, netIp), nil
  333. }
  334. func (h *SHostInfo) parseConfig() error {
  335. mem := h.GetMemory()
  336. if mem < 64 { // MB
  337. return fmt.Errorf("Not enough memory!")
  338. }
  339. if len(options.HostOptions.Networks) == 0 {
  340. netConf, err := h.generateLocalNetworkConfig()
  341. if err != nil {
  342. return errors.Wrap(err, "generateLocalNetworkConfig")
  343. }
  344. log.Infof("Generate network config %s", netConf)
  345. options.HostOptions.Networks = []string{netConf}
  346. if len(options.HostOptions.Config) > 0 {
  347. if err = fileutils2.FilePutContents(
  348. options.HostOptions.Config,
  349. jsonutils.Marshal(options.HostOptions).YAMLString(),
  350. false,
  351. ); err != nil {
  352. log.Errorf("write config file failed %s", err)
  353. }
  354. }
  355. }
  356. for _, n := range options.HostOptions.Networks {
  357. nic, err := NewNIC(n)
  358. if err != nil {
  359. return errors.Wrapf(err, "NewNIC %s", n)
  360. }
  361. h.Nics = append(h.Nics, nic)
  362. }
  363. {
  364. // host local bridge
  365. nic, err := NewNIC(fmt.Sprintf("/%s/%s", options.HostOptions.HostLocalBridgeName, api.DEFAULT_HOST_LOCAL_WIRE_NAME))
  366. if err != nil {
  367. return errors.Wrapf(err, "NewNic for host local bridge %s", options.HostOptions.HostLocalBridgeName)
  368. }
  369. h.Nics = append(h.Nics, nic)
  370. }
  371. if len(options.HostOptions.ListenInterface) > 0 {
  372. h.MasterNic = netutils2.NewNetInterface(options.HostOptions.ListenInterface)
  373. if len(h.MasterNic.Addr) == 0 && len(h.MasterNic.Addr6) == 0 {
  374. return fmt.Errorf("Listen interface %s master have no IP", options.HostOptions.ListenInterface)
  375. }
  376. } else {
  377. // set MasterNic to the first NIC with IP
  378. h.MasterNic = nil
  379. for _, n := range h.Nics {
  380. if len(n.Ip) > 0 || len(n.Ip6) > 0 {
  381. h.MasterNic = netutils2.NewNetInterface(n.Bridge)
  382. }
  383. }
  384. if h.MasterNic == nil {
  385. return fmt.Errorf("No interface suitable to be master NIC")
  386. }
  387. }
  388. if h.MasterNic != nil {
  389. if regutils.MatchIP4Addr(h.GetMasterIp()) {
  390. options.HostOptions.Address = "0.0.0.0"
  391. } else {
  392. options.HostOptions.Address = "::"
  393. }
  394. }
  395. h.IsolatedDeviceMan = isolated_device.NewManager(h)
  396. return nil
  397. }
  398. func (h *SHostInfo) getIoSchedulerSupported(scheduler string, supportedSchedulers []string) (string, map[string]string) {
  399. // IoScheduler default to none scheduler
  400. ioParams := make(map[string]string, 0)
  401. switch scheduler {
  402. case "deadline":
  403. if utils.IsInStringArray("mq-deadline", supportedSchedulers) {
  404. scheduler = "mq-deadline"
  405. } else if utils.IsInStringArray("deadline", supportedSchedulers) {
  406. scheduler = "deadline"
  407. } else {
  408. scheduler = "none"
  409. }
  410. case "cfq":
  411. if utils.IsInStringArray("bfq", supportedSchedulers) {
  412. scheduler = "bfq"
  413. } else if utils.IsInStringArray("cfq", supportedSchedulers) {
  414. scheduler = "cfq"
  415. } else {
  416. scheduler = "none"
  417. }
  418. default:
  419. if !utils.IsInStringArray(scheduler, supportedSchedulers) {
  420. scheduler = "none"
  421. }
  422. }
  423. ioParams["queue/scheduler"] = scheduler
  424. switch scheduler {
  425. case "cfq":
  426. ioParams["queue/iosched/group_isolation"] = "1"
  427. ioParams["queue/iosched/slice_idle"] = "0"
  428. ioParams["queue/iosched/group_idle"] = "0"
  429. ioParams["queue/iosched/quantum"] = "32"
  430. }
  431. return scheduler, ioParams
  432. }
  433. func (h *SHostInfo) prepareEnv() error {
  434. if err := h.fixPathEnv(); err != nil {
  435. return errors.Wrap(err, "Fix path environment")
  436. }
  437. if options.HostOptions.ReportInterval > 300 {
  438. return fmt.Errorf("Option report_interval must no longer than 5 min")
  439. }
  440. for _, dirPath := range []string{
  441. options.HostOptions.ServersPath,
  442. options.HostOptions.MemorySnapshotsPath,
  443. options.HostOptions.LocalBackupTempPath,
  444. } {
  445. output, err := procutils.NewCommand("mkdir", "-p", dirPath).Output()
  446. if err != nil {
  447. return errors.Wrapf(err, "failed to create path %s: %s", dirPath, output)
  448. }
  449. }
  450. _, err := procutils.NewCommand("ethtool", "-h").Output()
  451. if err != nil {
  452. return errors.Wrap(err, "Execute 'ethtool -h'")
  453. }
  454. // setup tuned-adm
  455. _, err = procutils.NewRemoteCommandAsFarAsPossible("tuned-adm", "profile", "virtual-host").Output()
  456. if err != nil {
  457. log.Errorf("tuned-adm profile virtual-host fail: %s", err)
  458. }
  459. supportedSchedulers, _ := fileutils2.GetAllBlkdevsIoSchedulers()
  460. log.Infof("supported io schedulers %v", supportedSchedulers)
  461. // set hdd block devices io scheduler
  462. {
  463. hddIoScheduler, ioParams := h.getIoSchedulerSupported(options.HostOptions.BlockIoScheduler, supportedSchedulers)
  464. log.Infof("HDD I/O Scheduler switch to %s", hddIoScheduler)
  465. fileutils2.ChangeHddBlkdevsParams(ioParams)
  466. h.IoScheduler = hddIoScheduler
  467. }
  468. // set ssd block devices io scheduler
  469. {
  470. ssdIoScheduler, ioParams := h.getIoSchedulerSupported(options.HostOptions.SsdBlockIoScheduler, supportedSchedulers)
  471. log.Infof("SSD I/O Scheduler switch to %s", ssdIoScheduler)
  472. fileutils2.ChangeSsdBlkdevsParams(ioParams)
  473. }
  474. if !utils.IsInStringArray("tun", options.HostOptions.SkipCheckKernelMods) {
  475. _, err = procutils.NewRemoteCommandAsFarAsPossible("modprobe", "tun").Output()
  476. if err != nil {
  477. return errors.Wrap(err, "Failed to activate tun/tap device")
  478. }
  479. }
  480. output, err := procutils.NewRemoteCommandAsFarAsPossible("modprobe", "vhost_net").Output()
  481. if err != nil {
  482. log.Warningf("modprobe vhost_net error: %s", output)
  483. }
  484. if err := cgrouputils.Init(h.IoScheduler); err != nil {
  485. return fmt.Errorf("Cannot initialize control group subsystem: %s", err)
  486. }
  487. h.sysinfo.CgroupVersion = cgrouputils.GetCgroupVersion()
  488. // err = h.resetIptables()
  489. // if err != nil {
  490. // return err
  491. // }
  492. if options.HostOptions.EnableKsm && options.HostOptions.HugepagesOption == "disable" {
  493. h.EnableKsm(900)
  494. } else {
  495. h.DisableKsm()
  496. }
  497. switch options.HostOptions.HugepagesOption {
  498. case "disable":
  499. h.DisableHugepages()
  500. case "native":
  501. h.EnableNativeHugepages()
  502. hp, err := h.Mem.GetHugepages()
  503. if err != nil {
  504. return errors.Wrap(err, "MEM.GetHugepages")
  505. }
  506. for i := 0; i < len(hp); i++ {
  507. if hp[i].SizeKb == options.HostOptions.HugepageSizeMb*1024 {
  508. nr := hp[i].Total
  509. h.sysinfo.HugepageNr = &nr
  510. h.sysinfo.HugepageSizeKb = hp[i].SizeKb
  511. break
  512. }
  513. }
  514. if h.sysinfo.HugepageNr == nil || *h.sysinfo.HugepageNr == 0 {
  515. return errors.Errorf("hugepage %d nr 0", options.HostOptions.HugepageSizeMb)
  516. }
  517. case "transparent":
  518. h.EnableTransparentHugepages()
  519. default:
  520. return fmt.Errorf("Invalid hugepages option")
  521. }
  522. h.sysinfo.HugepagesOption = options.HostOptions.HugepagesOption
  523. h.PreventArpFlux()
  524. h.tuneSystem()
  525. return nil
  526. }
  527. func runDmidecode(hType string) (*types.SSystemInfo, error) {
  528. output, err := procutils.NewCommand("dmidecode", "-t", hType).Output()
  529. if err != nil {
  530. return &types.SSystemInfo{}, errors.Wrapf(err, "cmd: dmidecode -t %s, output: %s", hType, output)
  531. }
  532. info, err := sysutils.ParseDMISysinfo(strings.Split(string(output), "\n"))
  533. if err != nil {
  534. return &types.SSystemInfo{}, errors.Wrapf(err, "ParseDMISysinfo with line: %s", output)
  535. }
  536. return info, nil
  537. }
  538. func (h *SHostInfo) detectHostInfo() error {
  539. sysInfo, err := runDmidecode("1")
  540. if err != nil {
  541. log.Warningf("get system info error: %v", err)
  542. }
  543. h.sysinfo.SSystemInfo = sysInfo
  544. motherboardInfo, err := runDmidecode("2")
  545. if err != nil {
  546. log.Warningf("get motherboard info error: %v", err)
  547. }
  548. h.sysinfo.MotherboardInfo = motherboardInfo
  549. h.detectKvmModuleSupport()
  550. h.detectKVMMaxCpus()
  551. h.detectNestSupport()
  552. if err := h.detectSyssoftwareInfo(); err != nil {
  553. return err
  554. }
  555. h.detectStorageSystem()
  556. if options.HostOptions.EnableHostAgentNumaAllocate {
  557. h.sysinfo.HostAgentCpuNumaAllocate = true
  558. }
  559. topoInfo, err := hardware.GetTopology()
  560. if err != nil {
  561. return errors.Wrap(err, "Get hardware topology")
  562. }
  563. cpuInfo, err := hardware.GetCPU()
  564. if err != nil {
  565. return errors.Wrap(err, "Get CPU info")
  566. }
  567. h.sysinfo.Topology = topoInfo
  568. h.sysinfo.CPUInfo = cpuInfo
  569. if err = h.GetNodeHugepages(); err != nil {
  570. return errors.Wrap(err, "GetNodeHugepages")
  571. }
  572. if options.HostOptions.CheckSystemServices {
  573. if err := h.checkSystemServices(); err != nil {
  574. return err
  575. }
  576. }
  577. return nil
  578. }
  579. func (h *SHostInfo) checkSystemServices() error {
  580. funcEn := func(srv string, srvinst system_service.ISystemService) {
  581. if !srvinst.IsInstalled() {
  582. log.Warningf("Service %s not installed", srv)
  583. } else if !srvinst.IsActive() {
  584. srvinst.Start(false)
  585. }
  586. }
  587. if options.HostOptions.ManageNtpConfiguration {
  588. for _, srv := range []string{"ntpd"} {
  589. srvinst := system_service.GetService(srv)
  590. funcEn(srv, srvinst)
  591. }
  592. }
  593. svcs := os.Getenv("HOST_SYSTEM_SERVICES_OFF")
  594. for _, srv := range []string{"host_sdnagent", "host-deployer", "telegraf"} {
  595. srvinst := system_service.GetService(srv)
  596. if strings.Contains(svcs, srv) {
  597. if srvinst.IsActive() || srvinst.IsEnabled() {
  598. srvinst.Stop(true)
  599. }
  600. } else {
  601. funcEn(srv, srvinst)
  602. }
  603. }
  604. return nil
  605. }
  606. func (h *SHostInfo) detectStorageSystem() {
  607. stype, _ := sysutils.DetectStorageType()
  608. switch stype {
  609. case "hdd":
  610. stype = api.DISK_TYPE_ROTATE
  611. case "ssd":
  612. stype = api.DISK_TYPE_SSD
  613. case "hybird":
  614. stype = api.DISK_TYPE_HYBRID
  615. default:
  616. stype = ""
  617. }
  618. h.sysinfo.StorageType = stype
  619. }
  620. func (h *SHostInfo) fixPathEnv() error {
  621. var paths = []string{
  622. "/usr/bin", // usr bin at first for host container deploy
  623. "/usr/sbin",
  624. "/usr/local/sbin",
  625. "/usr/local/bin",
  626. "/sbin",
  627. "/bin",
  628. }
  629. return os.Setenv("PATH", strings.Join(paths, ":"))
  630. }
  631. func (h *SHostInfo) DisableHugepages() {
  632. kv := map[string]string{
  633. "/proc/sys/vm/nr_hugepages": "0",
  634. "/sys/kernel/mm/transparent_hugepage/enabled": "never",
  635. "/sys/kernel/mm/transparent_hugepage/defrag": "never",
  636. }
  637. for k, v := range kv {
  638. sysutils.SetSysConfig(k, v)
  639. }
  640. }
  641. func (h *SHostInfo) EnableTransparentHugepages() {
  642. kv := map[string]string{
  643. "/proc/sys/vm/nr_hugepages": "0",
  644. "/sys/kernel/mm/transparent_hugepage/enabled": "always",
  645. "/sys/kernel/mm/transparent_hugepage/defrag": "always",
  646. }
  647. for k, v := range kv {
  648. sysutils.SetSysConfig(k, v)
  649. }
  650. }
  651. func (h *SHostInfo) GetNodeHugepages() error {
  652. if options.HostOptions.HugepagesOption != "native" {
  653. return nil
  654. }
  655. hugepageSizeKB := h.sysinfo.HugepageSizeKb
  656. nodeHugepages := make([]hostapi.HostNodeHugepageNr, len(h.sysinfo.Topology.Nodes))
  657. for i := range h.sysinfo.Topology.Nodes {
  658. nodeId := h.sysinfo.Topology.Nodes[i].ID
  659. nodeHugepagePath := fmt.Sprintf("/sys/devices/system/node/node%d/hugepages/hugepages-%dkB", nodeId, hugepageSizeKB)
  660. if !fileutils2.Exists(nodeHugepagePath) {
  661. return errors.Errorf("node %s has no hugepages ?", nodeHugepagePath)
  662. }
  663. nrHugepage, err := fileutils2.FileGetIntContent(path.Join(nodeHugepagePath, "nr_hugepages"))
  664. if err != nil {
  665. return errors.Wrap(err, "get node nr hugepage")
  666. }
  667. nodeHugepages[i].NodeId = nodeId
  668. nodeHugepages[i].HugepageNr = nrHugepage
  669. }
  670. h.sysinfo.NodeHugepages = nodeHugepages
  671. return nil
  672. }
  673. func (h *SHostInfo) GetMemory() int {
  674. return h.Mem.Total
  675. }
  676. func (h *SHostInfo) GetMemoryTotal() int {
  677. if h.Mem.MemInfo == nil {
  678. return h.Mem.MemInfo.Total
  679. }
  680. return h.Mem.Total
  681. }
  682. /* func (h *SHostInfo) getCurrentHugepageNr() (int64, error) {
  683. nrStr, err := fileutils2.FileGetContents("/proc/sys/vm/nr_hugepages")
  684. if err != nil {
  685. return 0, errors.Wrap(err, "file get content nr hugepages")
  686. }
  687. nr, err := strconv.Atoi(strings.TrimSpace(nrStr))
  688. if err != nil {
  689. return 0, errors.Wrap(err, "nr str atoi")
  690. }
  691. return int64(nr), nil
  692. } */
  693. func (h *SHostInfo) EnableNativeHugepages() {
  694. kv := map[string]string{
  695. "/sys/kernel/mm/transparent_hugepage/enabled": "never",
  696. "/sys/kernel/mm/transparent_hugepage/defrag": "never",
  697. }
  698. for k, v := range kv {
  699. sysutils.SetSysConfig(k, v)
  700. }
  701. }
  702. func (h *SHostInfo) EnableKsm(sleepSec int) {
  703. sysutils.SetSysConfig("/sys/kernel/mm/ksm/run", "1")
  704. sysutils.SetSysConfig("/sys/kernel/mm/ksm/sleep_millisecs",
  705. fmt.Sprintf("%d", sleepSec*1000))
  706. h.sysinfo.EnableKsm = true
  707. }
  708. func (h *SHostInfo) DisableKsm() {
  709. sysutils.SetSysConfig("/sys/kernel/mm/ksm/run", "0")
  710. h.sysinfo.EnableKsm = false
  711. }
  712. func (h *SHostInfo) PreventArpFlux() {
  713. sysutils.SetSysConfig("/proc/sys/net/ipv4/conf/all/arp_filter", "1")
  714. }
  715. // Any system wide optimizations
  716. // set swappiness=0 to avoid swap
  717. // set vfs_cache_pressure=300 to avoid stale pagecache
  718. func (h *SHostInfo) tuneSystem() {
  719. minMemMb := h.getKubeReservedMemMb()
  720. if minMemMb < 100 {
  721. minMemMb = 100
  722. }
  723. minMemKB := 2 * minMemMb * 1024
  724. kv := map[string]string{
  725. "/proc/sys/vm/swappiness": "0",
  726. "/proc/sys/vm/vfs_cache_pressure": "350",
  727. "/proc/sys/net/ipv4/tcp_mtu_probing": "2",
  728. "/proc/sys/net/ipv4/neigh/default/gc_thresh1": "1024",
  729. "/proc/sys/net/ipv4/neigh/default/gc_thresh2": "4096",
  730. "/proc/sys/net/ipv4/neigh/default/gc_thresh3": "8192",
  731. "/sys/module/kvm/parameters/ignore_msrs": "1",
  732. "/sys/module/kvm/parameters/report_ignored_msrs": "0",
  733. "/proc/sys/net/netfilter/nf_conntrack_tcp_be_liberal": "1",
  734. }
  735. ret, err := fileutils2.FileGetIntContent("/proc/sys/vm/min_free_kbytes")
  736. if err != nil {
  737. log.Errorf("failed get /proc/sys/vm/min_free_kbytes: %s", err)
  738. } else if ret < minMemKB {
  739. kv["/proc/sys/vm/min_free_kbytes"] = fmt.Sprintf("%d", minMemKB)
  740. }
  741. for k, v := range kv {
  742. sysutils.SetSysConfig(k, v)
  743. }
  744. }
  745. /*func (h *SHostInfo) resetIptables() error {
  746. for _, tbl := range []string{"filter", "nat", "mangle"} {
  747. output, err := procutils.NewCommand("iptables", "-t", tbl, "-F").Output()
  748. if err != nil {
  749. return errors.Wrapf(err, "fail to clean NAT iptables: %s", output)
  750. }
  751. }
  752. return nil
  753. }*/
  754. func (h *SHostInfo) initCgroup() error {
  755. reservedCpus := cpuset.NewCPUSet()
  756. if h.reservedCpusInfo != nil {
  757. var err error
  758. reservedCpus, err = cpuset.Parse(h.reservedCpusInfo.Cpus)
  759. if err != nil {
  760. return errors.Wrap(err, "failed parse reserved cpus")
  761. }
  762. }
  763. hostCpusetBuilder := cpuset.NewBuilder()
  764. for i := 0; i < h.Cpu.CpuCount; i++ {
  765. if reservedCpus.Contains(i) {
  766. continue
  767. }
  768. hostCpusetBuilder.Add(i)
  769. }
  770. hostCpuset := hostCpusetBuilder.Result()
  771. hostCpusetStr := hostCpuset.String()
  772. // init host cpuset root group
  773. if !cgrouputils.NewCGroupCPUSetTask("", hostconsts.HOST_CGROUP, hostCpusetStr, "").Configure() {
  774. return fmt.Errorf("failed init host root cpuset")
  775. }
  776. // init host cpu root group
  777. cgrouputils.NewCGroupCPUTask("", hostconsts.HOST_CGROUP, hostCpuset.Size()*1024).SetTask()
  778. if h.reservedCpusInfo != nil {
  779. reservedCpusTask := cgrouputils.NewCGroupCPUSetTask("", hostconsts.HOST_RESERVED_CPUSET, h.reservedCpusInfo.Cpus, h.reservedCpusInfo.Mems)
  780. if !reservedCpusTask.Configure() {
  781. return fmt.Errorf("failed init host reserved cpuset %s", h.reservedCpusInfo.Cpus)
  782. }
  783. if h.reservedCpusInfo.DisableSchedLoadBalance != nil &&
  784. *h.reservedCpusInfo.DisableSchedLoadBalance &&
  785. !reservedCpusTask.CustomConfig(cgrouputils.CPUSET_SCHED_LOAD_BALANCE, "0") {
  786. return fmt.Errorf("failed init host reserved cpuset sched load balance")
  787. }
  788. if len(h.reservedCpusInfo.ProcessesPrefix) > 0 {
  789. go h.startBindReservedCpus(h.reservedCpusInfo.ProcessesPrefix)
  790. }
  791. }
  792. return nil
  793. }
  794. func (h *SHostInfo) detectKvmModuleSupport() string {
  795. h.sysinfo.KvmModule = sysutils.GetKVMModuleSupport()
  796. return h.sysinfo.KvmModule
  797. }
  798. func (h *SHostInfo) detectNestSupport() {
  799. if sysutils.IsNestEnabled() {
  800. h.sysinfo.Nest = "enabled"
  801. } else {
  802. h.sysinfo.Nest = "disabled"
  803. }
  804. }
  805. func (h *SHostInfo) detectOsDist() {
  806. files, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", "ls /etc/*elease").Output()
  807. if err != nil {
  808. log.Errorln(err)
  809. return
  810. }
  811. re := regexp.MustCompile(`(.+) release ([\d.]+)[^(]*(?:\((.+)\))?`)
  812. for _, file := range strings.Split(string(files), "\n") {
  813. content, err := procutils.NewRemoteCommandAsFarAsPossible("cat", file).Output()
  814. if err != nil {
  815. log.Errorln(err)
  816. continue
  817. }
  818. m := re.FindStringSubmatch(string(content))
  819. if len(m) == 4 {
  820. h.sysinfo.OsDistribution = m[1]
  821. h.sysinfo.OsVersion = m[2]
  822. break
  823. }
  824. }
  825. log.Infof("DetectOsDist %s %s", h.sysinfo.OsDistribution, h.sysinfo.OsVersion)
  826. if len(h.sysinfo.OsDistribution) == 0 {
  827. log.Errorln("Failed to detect distribution info")
  828. content, err := procutils.NewRemoteCommandAsFarAsPossible("cat", "/etc/os-release").Output()
  829. if err != nil {
  830. log.Errorln(err)
  831. }
  832. for _, line := range strings.Split(string(content), "\n") {
  833. line = strings.TrimSpace(line)
  834. if strings.HasPrefix(line, "ID=") {
  835. h.sysinfo.OsDistribution = line[3:]
  836. continue
  837. }
  838. if strings.HasPrefix(line, "VERSION=") {
  839. h.sysinfo.OsVersion = strings.Trim(line[8:], "\"")
  840. continue
  841. }
  842. }
  843. }
  844. if utils.IsInStringArray(strings.ToLower(h.sysinfo.OsDistribution), []string{"uos", "debian", "ubuntu"}) {
  845. if err := procutils.NewRemoteCommandAsFarAsPossible("systemctl", "cat", "--", "openvswitch").Run(); err != nil {
  846. log.Warningf("system_service.SetOpenvswitchName to openvswitch-switch")
  847. system_service.SetOpenvswitchName("openvswitch-switch")
  848. }
  849. }
  850. }
  851. func (h *SHostInfo) detectKernelVersion() {
  852. out, err := procutils.NewCommand("uname", "-r").Output()
  853. if err != nil {
  854. log.Errorf("detectKernelVersion error: %v", err)
  855. }
  856. h.sysinfo.KernelVersion = strings.TrimSpace(string(out))
  857. }
  858. func (h *SHostInfo) detectSyssoftwareInfo() error {
  859. h.detectOsDist()
  860. h.detectKernelVersion()
  861. if !h.IsContainerHost() {
  862. if err := h.detectQemuVersion(); err != nil {
  863. log.Errorf("detect qemu version: %s", err.Error())
  864. h.AppendHostError(fmt.Sprintf("detect qemu version: %s", err.Error()))
  865. }
  866. }
  867. h.detectOvsVersion()
  868. if err := h.detectOvsKOVersion(); err != nil {
  869. log.Errorf("detect ovs kernel version: %s", err.Error())
  870. h.AppendHostError(fmt.Sprintf("detect ovs kernel version: %s", err.Error()))
  871. }
  872. return nil
  873. }
  874. func (h *SHostInfo) detectQemuVersion() error {
  875. if len(qemutils.GetQemu("")) == 0 {
  876. return fmt.Errorf("Qemu not installed")
  877. }
  878. out, err := procutils.NewRemoteCommandAsFarAsPossible(qemutils.GetQemu(""), "-version").Output()
  879. if err != nil {
  880. return errors.Errorf("exec qemu version failed %s", out)
  881. }
  882. cmd := qemutils.GetQemu(options.HostOptions.DefaultQemuVersion)
  883. version, err := procutils.NewRemoteCommandAsFarAsPossible(cmd, "--version").Output()
  884. if err != nil {
  885. log.Errorln(err)
  886. return err
  887. } else {
  888. versions := strings.Split(string(version), "\n")
  889. parts := strings.Split(versions[0], " ")
  890. var v = parts[len(parts)-1]
  891. if strings.HasPrefix(parts[len(parts)-1], "(") {
  892. v = parts[len(parts)-2]
  893. }
  894. if len(v) > 0 {
  895. log.Infof("Detect qemu version is %s", v)
  896. h.sysinfo.QemuVersion = v
  897. } else {
  898. return fmt.Errorf("Failed to detect qemu version")
  899. }
  900. }
  901. return h.detectQemuCapabilities(h.sysinfo.QemuVersion)
  902. }
  903. const (
  904. KVM_GET_API_VERSION = uintptr(44544)
  905. KVM_CREATE_VM = uintptr(44545)
  906. KVM_CHECK_EXTENSION = uintptr(44547)
  907. KVM_CAP_NR_VCPUS = 9
  908. KVM_CAP_MAX_VCPUS = 66
  909. // TODO: arm mem ipa size for max memsize
  910. // KVM_CAP_ARM_VM_IPA_SIZE
  911. )
  912. func (h *SHostInfo) detectKVMMaxCpus() error {
  913. ioctl := func(fd, op, arg uintptr) (uintptr, uintptr, syscall.Errno) {
  914. return syscall.Syscall(syscall.SYS_IOCTL, fd, op, arg)
  915. }
  916. kvm, err := syscall.Open("/dev/kvm", syscall.O_RDONLY, 0644)
  917. if err != nil {
  918. return errors.Wrap(err, "failed open /dev/kvm")
  919. }
  920. defer syscall.Close(kvm)
  921. r, _, errno := ioctl(uintptr(kvm), KVM_GET_API_VERSION, uintptr(0))
  922. if errno != 0 {
  923. return errors.Errorf("get api version: %d", errno)
  924. }
  925. log.Infof("KVM API VERSION %d", r)
  926. r, _, errno = ioctl(uintptr(kvm), KVM_CHECK_EXTENSION, uintptr(KVM_CAP_MAX_VCPUS))
  927. if errno != 0 {
  928. return errors.Errorf("kvm check extension KVM_CAP_MAX_VCPUS errno: %d", errno)
  929. }
  930. log.Infof("KVM CAP MAX VCPUS: %d", r)
  931. if r > 0 {
  932. h.kvmMaxCpus = uint(r)
  933. }
  934. r, _, errno = ioctl(uintptr(kvm), KVM_CHECK_EXTENSION, uintptr(KVM_CAP_NR_VCPUS))
  935. if errno != 0 {
  936. return errors.Errorf("kvm check extension KVM_CAP_NR_VCPUS errno: %d", errno)
  937. }
  938. log.Infof("KVM CAP NR VCPUS: %d", r)
  939. if r > 0 && (h.kvmMaxCpus == 0 || h.kvmMaxCpus > uint(r)) {
  940. h.kvmMaxCpus = uint(r)
  941. }
  942. // kernel doc: If the KVM_CAP_NR_VCPUS does not exist
  943. // you should assume that max_vcpus is 4 cpus max.
  944. if h.kvmMaxCpus == 0 {
  945. h.kvmMaxCpus = 4
  946. }
  947. return nil
  948. }
  949. type QemuCaps struct {
  950. QemuVersion string
  951. MachineInfoList []monitor.MachineInfo
  952. }
  953. func (h *SHostInfo) loadQemuCaps(capsPath string) (*QemuCaps, error) {
  954. if fileutils2.Exists(capsPath) {
  955. caps, err := fileutils2.FileGetContents(capsPath)
  956. if err != nil {
  957. log.Errorf("failed get qemu caps: %s", err)
  958. return nil, err
  959. }
  960. qemuCaps := new(QemuCaps)
  961. jCaps, err := jsonutils.ParseString(caps)
  962. if err != nil {
  963. log.Errorf("failed parse qemu caps: %s", err)
  964. return nil, err
  965. }
  966. err = jCaps.Unmarshal(qemuCaps)
  967. if err != nil {
  968. log.Errorf("failed unmarshal qemu caps: %s", err)
  969. return nil, err
  970. }
  971. return qemuCaps, nil
  972. }
  973. return nil, nil
  974. }
  975. func (h *SHostInfo) detectQemuCapabilities(version string) error {
  976. capsPath := path.Join(options.HostOptions.ServersPath, "qemu_caps")
  977. caps, err := h.loadQemuCaps(capsPath)
  978. if err == nil && caps != nil && caps.QemuVersion == version {
  979. h.qemuMachineInfoList = caps.MachineInfoList
  980. return nil
  981. }
  982. qmpCmds := fmt.Sprintf(`echo "{'execute': 'qmp_capabilities'}
  983. {'execute': 'query-machines','id':'query_machines'}
  984. {'execute': 'quit'}" | %s -qmp stdio -vnc none -machine none -display none`, qemutils.GetQemu(version))
  985. log.Debugf("qemu caps cmdline %v", qmpCmds)
  986. out, err := procutils.NewRemoteCommandAsFarAsPossible("sh", "-c", qmpCmds).Output()
  987. if err != nil {
  988. log.Errorf("failed start qemu caps cmdline: %s", qmpCmds)
  989. }
  990. segs := bytes.Split(out, []byte{'\n'})
  991. for _, seg := range segs {
  992. res, err := jsonutils.Parse(bytes.TrimSpace(seg))
  993. if err != nil {
  994. return errors.Errorf("Unmarshal %s error: %s", seg, err)
  995. }
  996. id, _ := res.GetString("id")
  997. if id == "query_machines" {
  998. var machineInfoList = make([]monitor.MachineInfo, 0)
  999. err = res.Unmarshal(&machineInfoList, "return")
  1000. if err != nil {
  1001. return errors.Wrapf(err, "failed unmarshal machineinfo return %s", res.PrettyString())
  1002. }
  1003. h.qemuMachineInfoList = machineInfoList
  1004. qemuCaps := &QemuCaps{
  1005. QemuVersion: version,
  1006. MachineInfoList: machineInfoList,
  1007. }
  1008. return fileutils2.FilePutContents(capsPath, jsonutils.Marshal(qemuCaps).String(), false)
  1009. }
  1010. }
  1011. return errors.Errorf("failed parse qemu machine info: %s", out)
  1012. }
  1013. func (h *SHostInfo) GetQemuMachineInfoList() []monitor.MachineInfo {
  1014. return h.qemuMachineInfoList
  1015. }
  1016. func (h *SHostInfo) GetKVMMaxCpus() uint {
  1017. return h.kvmMaxCpus
  1018. }
  1019. func (h *SHostInfo) detectOvsVersion() {
  1020. version, err := procutils.NewCommand("ovs-vsctl", "--version").Output()
  1021. if err != nil {
  1022. log.Errorln(err)
  1023. } else {
  1024. versions := strings.Split(string(version), "\n")
  1025. parts := strings.Split(versions[0], " ")
  1026. v := parts[len(parts)-1]
  1027. if len(v) > 0 {
  1028. log.Infof("Detect OVS version is %s", v)
  1029. h.sysinfo.OvsVersion = v
  1030. } else {
  1031. log.Errorln("Failed to detect ovs version")
  1032. }
  1033. }
  1034. }
  1035. func (h *SHostInfo) detectOvsKOVersion() error {
  1036. output, err := procutils.NewRemoteCommandAsFarAsPossible("modinfo", "openvswitch").Output()
  1037. if err != nil {
  1038. return errors.Errorf("modinfo openvswitch failed %s", output)
  1039. }
  1040. lines := strings.Split(string(output), "\n")
  1041. for i := 0; i < len(lines); i++ {
  1042. line := lines[i]
  1043. if strings.HasPrefix(line, "version:") || strings.HasPrefix(line, "vermagic:") {
  1044. log.Infof("kernel module openvswitch %s", line)
  1045. parts := strings.Split(line, ":")
  1046. if len(parts) > 1 {
  1047. h.sysinfo.OvsKmodVersion = strings.TrimSpace(parts[1])
  1048. }
  1049. return nil
  1050. }
  1051. }
  1052. return errors.Errorf("kernel module openvswitch paramters version not found, is kernel version correct ??")
  1053. }
  1054. func (h *SHostInfo) GetMasterNicIpAndMask() (string, int) {
  1055. if h.MasterNic.Addr != "" {
  1056. mask, _ := h.MasterNic.Mask.Size()
  1057. return h.MasterNic.Addr, mask
  1058. }
  1059. mask, _ := h.MasterNic.Mask6.Size()
  1060. return h.MasterNic.Addr6, mask
  1061. }
  1062. func (h *SHostInfo) GetMasterIp() string {
  1063. if h.MasterNic == nil {
  1064. return ""
  1065. }
  1066. if h.MasterNic.Addr != "" {
  1067. return h.MasterNic.Addr
  1068. }
  1069. return h.MasterNic.Addr6
  1070. }
  1071. func (h *SHostInfo) GetMasterMac() string {
  1072. return h.getMasterMacWithRefresh(false)
  1073. }
  1074. func (h *SHostInfo) getMasterMacWithRefresh(refresh bool) string {
  1075. if refresh {
  1076. h.MasterNic.FetchConfig()
  1077. }
  1078. return h.MasterNic.GetMac()
  1079. }
  1080. func (h *SHostInfo) getMatchNic(mac string, vlanId int) *SNIC {
  1081. for _, nic := range h.Nics {
  1082. if nic.BridgeDev.GetMac() == mac && nic.BridgeDev.GetVlanId() == vlanId {
  1083. return nic
  1084. }
  1085. }
  1086. return nil
  1087. }
  1088. func (h *SHostInfo) StartRegister(delay int) {
  1089. time.Sleep(time.Duration(delay) * time.Second)
  1090. h.register()
  1091. }
  1092. func (h *SHostInfo) reportHostErrors() {
  1093. var errs = []api.HostError{}
  1094. for _, v := range h.SysError {
  1095. errs = append(errs, v...)
  1096. }
  1097. data := jsonutils.NewDict()
  1098. data.Set(api.HOSTMETA_HOST_ERRORS, jsonutils.Marshal(errs))
  1099. _, err := modules.Hosts.SetMetadata(h.GetSession(), h.HostId, data)
  1100. if err != nil {
  1101. log.Errorf("failed sync host errors %s", err)
  1102. }
  1103. }
  1104. func (h *SHostInfo) register() {
  1105. if h.isRegistered {
  1106. return
  1107. }
  1108. hostInfo, err := h.initHostRecord()
  1109. if err != nil {
  1110. h.onFail(errors.Wrap(err, "initHostRecords"))
  1111. return
  1112. }
  1113. defer h.reportHostErrors()
  1114. eg := errgroup.Group{}
  1115. eg.Go(func() error {
  1116. if e := h.initCgroup(); e != nil {
  1117. return errors.Wrap(e, "initCgroup")
  1118. }
  1119. return nil
  1120. })
  1121. eg.Go(func() error {
  1122. if e := h.initHostNetworks(hostInfo); e != nil {
  1123. return errors.Wrap(e, "initHostNetworks")
  1124. }
  1125. return nil
  1126. })
  1127. eg.Go(func() error {
  1128. if e := h.initIsolatedDevices(); e != nil {
  1129. return errors.Wrap(e, "initIsolatedDevices")
  1130. }
  1131. return nil
  1132. })
  1133. eg.Go(func() error {
  1134. if e := h.initStorages(); e != nil {
  1135. return errors.Wrap(e, "initStorages")
  1136. }
  1137. return nil
  1138. })
  1139. if err = eg.Wait(); err != nil {
  1140. h.onFail(err)
  1141. return
  1142. }
  1143. err = h.finalizeNetworkSetup(context.Background())
  1144. if err != nil {
  1145. h.onFail(errors.Wrap(err, "finalizeNetworkSetup"))
  1146. return
  1147. }
  1148. if err := h.initHostFiles(); err != nil {
  1149. log.Errorf("initHostFiles failed: %s", err)
  1150. } else {
  1151. log.Infof("initHostFiles success")
  1152. }
  1153. h.deployAdminAuthorizedKeys()
  1154. h.onSucc()
  1155. }
  1156. func (h *SHostInfo) onFail(reason error) {
  1157. if len(h.HostId) > 0 && !h.isLoged {
  1158. logclient.AddSimpleActionLog(h, logclient.ACT_ONLINE, reason, hostutils.GetComputeSession(context.Background()).GetToken(), false)
  1159. data := jsonutils.NewDict()
  1160. data.Add(jsonutils.NewString(h.GetName()), "name")
  1161. data.Add(jsonutils.NewString(fmt.Sprintf("register failed: %v", reason)), "message")
  1162. notifyclient.SystemExceptionNotify(context.TODO(), napi.ActionSystemException, napi.TOPIC_RESOURCE_HOST, data)
  1163. h.isLoged = true
  1164. }
  1165. log.Errorf("register failed: %s", reason)
  1166. if h.kubeletConfig != nil {
  1167. // run in container, exit
  1168. panic("exit immediately for retry...")
  1169. } else {
  1170. // retry
  1171. log.Errorf("register failed, try 30 seconds later...")
  1172. h.StartRegister(30)
  1173. }
  1174. }
  1175. func (h *SHostInfo) initHostRecord() (*api.HostDetails, error) {
  1176. wireId, err := h.ensureMasterNetworks()
  1177. if err != nil {
  1178. return nil, errors.Wrap(err, "initHostRecord")
  1179. }
  1180. err = h.waitMasterNicIp()
  1181. if err != nil {
  1182. return nil, errors.Wrap(err, "waitMasterNicIp")
  1183. }
  1184. h.ZoneId, err = h.getZoneByWire(wireId)
  1185. if err != nil {
  1186. return nil, errors.Wrap(err, "getZoneByWire")
  1187. }
  1188. err = h.initZoneInfo(h.ZoneId)
  1189. if err != nil {
  1190. return nil, errors.Wrap(err, "initZoneInfo")
  1191. }
  1192. hostInfo, err := h.ensureHostRecord(h.ZoneId)
  1193. if err != nil {
  1194. return nil, errors.Wrap(err, "ensureHostRecord")
  1195. }
  1196. h.HostId = hostInfo.Id
  1197. h.cpuCmtBound = hostInfo.CpuCommitBound
  1198. h.memCmtBound = hostInfo.MemCommitBound
  1199. hostInfo, err = h.updateHostMetadata(hostInfo.Name)
  1200. if err != nil {
  1201. return nil, errors.Wrap(err, "updateHostMetadata")
  1202. }
  1203. // set auto migrate on host down
  1204. if hostInfo.AutoMigrateOnHostDown {
  1205. if err = h.SetOnHostDown(hostconsts.SHUTDOWN_SERVERS); err != nil {
  1206. return nil, errors.Wrap(err, "failed set on host down")
  1207. }
  1208. }
  1209. log.Infof("host health manager on host down %s", h.onHostDown)
  1210. // fetch host reserved cpus info
  1211. err = h.parseReservedCpusInfo(hostInfo)
  1212. if err != nil {
  1213. return nil, errors.Wrap(err, "parse reserved cpus info")
  1214. }
  1215. // enable numa allocate
  1216. if hostInfo.EnableNumaAllocate {
  1217. h.enableNumaAllocate = true
  1218. log.Infof("host enabled numa allocate")
  1219. }
  1220. // set host reserved memory
  1221. if h.IsHugepagesEnabled() && h.GetReservedMemMb() != hostInfo.MemReserved {
  1222. if err = h.updateHostReservedMem(h.GetReservedMemMb()); err != nil {
  1223. return nil, errors.Wrap(err, "updateHostReservedMem")
  1224. }
  1225. }
  1226. return hostInfo, nil
  1227. }
  1228. // try to create network on region.
  1229. func (h *SHostInfo) tryCreateNetworkOnWire() (string, error) {
  1230. masterIp, mask := h.GetMasterNicIpAndMask()
  1231. log.Infof("Get master ip %s and mask %d", masterIp, mask)
  1232. if len(masterIp) == 0 || mask == 0 {
  1233. return "", errors.Wrapf(httperrors.ErrInvalidStatus, "master ip %s mask %d", masterIp, mask)
  1234. }
  1235. params := jsonutils.NewDict()
  1236. params.Set("ip", jsonutils.NewString(masterIp))
  1237. params.Set("mask", jsonutils.NewInt(int64(mask)))
  1238. params.Set("is_classic", jsonutils.JSONTrue)
  1239. params.Set("server_type", jsonutils.NewString(string(api.NETWORK_TYPE_BAREMETAL)))
  1240. params.Set("is_on_premise", jsonutils.JSONTrue)
  1241. ret, err := modules.Networks.PerformClassAction(h.GetSession(), "try-create-network", params)
  1242. if err != nil {
  1243. return "", errors.Wrap(err, "try create network")
  1244. }
  1245. if !jsonutils.QueryBoolean(ret, "find_matched", false) {
  1246. return "", errors.Wrap(httperrors.ErrInvalidStatus, "try create network: find_matched == false")
  1247. }
  1248. wireId, err := ret.GetString("wire_id")
  1249. if err != nil {
  1250. return "", errors.Wrap(err, "try create network: get wire_id")
  1251. }
  1252. return wireId, nil
  1253. }
  1254. func (h *SHostInfo) ensureMasterNetworks() (string, error) {
  1255. masterIp := h.GetMasterIp()
  1256. if len(masterIp) == 0 {
  1257. return "", errors.Wrap(httperrors.ErrInvalidStatus, "master ip not found")
  1258. }
  1259. log.Infof("Master ip %s to fetch wire", masterIp)
  1260. params := jsonutils.NewDict()
  1261. params.Set("ip", jsonutils.NewString(masterIp))
  1262. params.Set("is_classic", jsonutils.JSONTrue)
  1263. params.Set("provider", jsonutils.NewString(api.CLOUD_PROVIDER_ONECLOUD))
  1264. params.Set("scope", jsonutils.NewString("system"))
  1265. params.Set("limit", jsonutils.NewInt(0))
  1266. // use default vpc
  1267. params.Set("vpc", jsonutils.NewString(api.DEFAULT_VPC_ID))
  1268. res, err := modules.Networks.List(h.GetSession(), params)
  1269. if err != nil {
  1270. return "", errors.Wrap(err, "fetch network by master ip")
  1271. }
  1272. var wireId string
  1273. if len(res.Data) == 0 {
  1274. wireId, err = h.tryCreateNetworkOnWire()
  1275. } else if len(res.Data) == 1 {
  1276. wireId, _ = res.Data[0].GetString("wire_id")
  1277. } else {
  1278. err = errors.Wrapf(httperrors.ErrConflict, "find multiple match network (%d) for access network", len(res.Data))
  1279. }
  1280. return wireId, err
  1281. }
  1282. func (h *SHostInfo) getZoneByWire(wireId string) (string, error) {
  1283. wire, err := hostutils.GetWireInfo(context.Background(), wireId)
  1284. if err != nil {
  1285. return "", errors.Wrap(err, "getWireInfo")
  1286. }
  1287. zoneId, err := wire.GetString("zone_id")
  1288. if err != nil {
  1289. return "", errors.Wrapf(err, "fail to get zone_id in wire info %s", wire)
  1290. }
  1291. return zoneId, nil
  1292. }
  1293. func (h *SHostInfo) GetSession() *mcclient.ClientSession {
  1294. return hostutils.GetComputeSession(context.Background())
  1295. }
  1296. func (h *SHostInfo) initZoneInfo(zoneId string) error {
  1297. log.Infof("Start GetZoneInfo %s", zoneId)
  1298. var params = jsonutils.NewDict()
  1299. params.Set("provider", jsonutils.NewString(api.CLOUD_PROVIDER_ONECLOUD))
  1300. res, err := modules.Zones.Get(h.GetSession(), zoneId, params)
  1301. if err != nil {
  1302. return errors.Wrap(err, "Zones.Get")
  1303. }
  1304. zone := api.ZoneDetails{}
  1305. jsonutils.Update(&zone, res)
  1306. h.Zone = zone.Name
  1307. h.ZoneId = zone.Id
  1308. h.Cloudregion = zone.Cloudregion
  1309. h.CloudregionId = zone.CloudregionId
  1310. h.ZoneManagerUri = zone.ManagerUri
  1311. if len(h.Zone) == 0 {
  1312. return errors.Wrapf(httperrors.ErrInvalidStatus, "failed to found zone with id %s", zoneId)
  1313. }
  1314. consts.SetZone(zone.Name)
  1315. return nil
  1316. }
  1317. func (h *SHostInfo) waitMasterNicIp() error {
  1318. const maxWaitSeconds = 900
  1319. waitSeconds := 0
  1320. for h.MasterNic.Addr == "" && h.MasterNic.Addr6 == "" && waitSeconds < maxWaitSeconds {
  1321. time.Sleep(time.Second)
  1322. waitSeconds++
  1323. h.MasterNic.FetchConfig()
  1324. }
  1325. if h.MasterNic.Addr == "" && h.MasterNic.Addr6 == "" {
  1326. return errors.Wrap(httperrors.ErrInvalidStatus, "fail to fetch master nic IP address")
  1327. }
  1328. if h.MasterNic.GetMac() == "" {
  1329. return errors.Wrap(httperrors.ErrInvalidStatus, "fail to fetch master nic MAC address")
  1330. }
  1331. return nil
  1332. }
  1333. func (h *SHostInfo) ensureHostRecord(zoneId string) (*api.HostDetails, error) {
  1334. allMasterMacs := h.MasterNic.GetAllMacs()
  1335. log.Infof("Master MAC: %s", strings.Join(allMasterMacs, ","))
  1336. params := jsonutils.NewDict()
  1337. params.Set("provider", jsonutils.NewString(api.CLOUD_PROVIDER_ONECLOUD))
  1338. params.Set("details", jsonutils.JSONTrue)
  1339. params.Set("scope", jsonutils.NewString("system"))
  1340. hosts := []api.HostDetails{}
  1341. for _, masterMac := range allMasterMacs {
  1342. params.Set("any_mac", jsonutils.NewString(masterMac))
  1343. res, err := modules.Hosts.List(h.GetSession(), params)
  1344. if err != nil {
  1345. return nil, errors.Wrap(err, "Hosts.List")
  1346. }
  1347. if len(res.Data) > 0 {
  1348. jsonutils.Update(&hosts, res.Data)
  1349. break
  1350. }
  1351. }
  1352. if len(hosts) > 1 {
  1353. for i := range hosts {
  1354. h.HostId = hosts[i].Id
  1355. logclient.AddSimpleActionLog(h, logclient.ACT_ONLINE, fmt.Errorf("duplicate host with %s", params), hostutils.GetComputeSession(context.Background()).GetToken(), false)
  1356. }
  1357. return nil, errors.Wrapf(httperrors.ErrConflict, "find multiple hosts match access mac %s", strings.Join(allMasterMacs, ","))
  1358. }
  1359. h.HostId = ""
  1360. if len(hosts) == 1 {
  1361. h.Domain_id = hosts[0].DomainId
  1362. h.HostId = hosts[0].Id
  1363. h.Project_domain = strings.ReplaceAll(hosts[0].ProjectDomain, " ", "+")
  1364. // 上次未能正常offline, 补充一次健康日志
  1365. if hosts[0].HostStatus == api.HOST_ONLINE {
  1366. reason := "The host status is online when it staring. Maybe the control center was down earlier"
  1367. logclient.AddSimpleActionLog(h, logclient.ACT_HEALTH_CHECK, map[string]string{"reason": reason}, hostutils.GetComputeSession(context.Background()).GetToken(), false)
  1368. data := jsonutils.NewDict()
  1369. data.Add(jsonutils.NewString(h.GetName()), "name")
  1370. data.Add(jsonutils.NewString(reason), "message")
  1371. notifyclient.SystemExceptionNotify(context.TODO(), napi.ActionSystemException, napi.TOPIC_RESOURCE_HOST, data)
  1372. }
  1373. }
  1374. host, err := h.updateOrCreateHost(h.HostId)
  1375. if err != nil {
  1376. return nil, errors.Wrap(err, "updateOrCreateHost")
  1377. }
  1378. return host, nil
  1379. }
  1380. func (h *SHostInfo) UpdateSyncInfo(hostId string, body jsonutils.JSONObject) (interface{}, error) {
  1381. if h.GetHostId() != hostId {
  1382. return nil, nil
  1383. }
  1384. descObj, err := body.Get("desc")
  1385. if err != nil {
  1386. return nil, err
  1387. }
  1388. domainId, _ := descObj.GetString("domain_id")
  1389. projectDomain, _ := descObj.GetString("project_domain")
  1390. if len(domainId) != 0 {
  1391. h.Domain_id = domainId
  1392. }
  1393. if len(projectDomain) != 0 {
  1394. h.Project_domain = strings.ReplaceAll(projectDomain, " ", "+")
  1395. }
  1396. return nil, nil
  1397. }
  1398. func (h *SHostInfo) ProbeSyncIsolatedDevices(hostId string, body jsonutils.JSONObject) (interface{}, error) {
  1399. if h.GetHostId() != hostId {
  1400. return nil, nil
  1401. }
  1402. return h.probeSyncIsolatedDevices()
  1403. }
  1404. func (h *SHostInfo) setHostname(name string) {
  1405. h.FullName = name
  1406. err := sysutils.SetHostname(name)
  1407. if err != nil {
  1408. log.Errorf("Fail to set system hostname: %s", err)
  1409. }
  1410. }
  1411. func (h *SHostInfo) fetchHostname() string {
  1412. if len(options.HostOptions.Hostname) > 0 {
  1413. return options.HostOptions.Hostname
  1414. } else {
  1415. hn, err := os.Hostname()
  1416. if err != nil {
  1417. log.Fatalf("fail to get hostname %s", err)
  1418. return ""
  1419. }
  1420. dotIdx := strings.IndexByte(hn, '.')
  1421. if dotIdx >= 0 {
  1422. hn = hn[:dotIdx]
  1423. }
  1424. hn = strings.ToLower(hn)
  1425. if len(hn) == 0 {
  1426. hn = "host"
  1427. }
  1428. masterIp := h.GetMasterIp()
  1429. if len(masterIp) > 0 {
  1430. return hn + "-" + strings.Replace(masterIp, ".", "-", -1)
  1431. } else {
  1432. return hn
  1433. }
  1434. }
  1435. }
  1436. func (h *SHostInfo) getSysInfo() *SSysInfo {
  1437. return h.sysinfo
  1438. }
  1439. func (h *SHostInfo) updateOrCreateHost(hostId string) (*api.HostDetails, error) {
  1440. if len(hostId) == 0 {
  1441. h.isInit = true
  1442. } else {
  1443. h.isInit = false
  1444. }
  1445. masterIp := h.GetMasterIp()
  1446. if len(masterIp) == 0 {
  1447. return nil, errors.Wrap(httperrors.ErrInvalidStatus, "master ip is none")
  1448. }
  1449. input := api.HostCreateInput{}
  1450. if len(hostId) == 0 {
  1451. input.GenerateName = h.fetchHostname()
  1452. }
  1453. input.AccessIp = masterIp
  1454. input.AccessMac = h.GetMasterMac()
  1455. var schema = "http"
  1456. if options.HostOptions.EnableSsl {
  1457. schema = "https"
  1458. }
  1459. if regutils.MatchIP6Addr(masterIp) {
  1460. input.ManagerUri = fmt.Sprintf("%s://[%s]:%d", schema, masterIp, options.HostOptions.Port)
  1461. } else {
  1462. input.ManagerUri = fmt.Sprintf("%s://%s:%d", schema, masterIp, options.HostOptions.Port)
  1463. }
  1464. input.CpuCount = &h.Cpu.cpuInfoProc.Count
  1465. nodeCount := int8(h.Cpu.cpuInfoDmi.Nodes)
  1466. if sysutils.IsHypervisor() {
  1467. nodeCount = 1
  1468. }
  1469. input.NodeCount = &nodeCount
  1470. input.CpuDesc = h.Cpu.cpuInfoProc.Model
  1471. input.CpuMicrocode = h.Cpu.cpuInfoProc.Microcode
  1472. input.CpuArchitecture = h.Cpu.CpuArchitecture
  1473. maxVcpu := int(h.GetKVMMaxCpus())
  1474. input.KvmCapMaxVcpu = &maxVcpu
  1475. if h.Cpu.cpuInfoProc.Freq > 0 {
  1476. input.CpuMhz = &h.Cpu.cpuInfoProc.Freq
  1477. }
  1478. input.CpuCache = fmt.Sprintf("%d", h.Cpu.cpuInfoProc.Cache)
  1479. input.MemSize = fmt.Sprintf("%d", h.GetMemory())
  1480. if len(hostId) == 0 {
  1481. // first time create
  1482. input.MemReserved = fmt.Sprintf("%d", h.GetReservedMemMb())
  1483. }
  1484. if h.IsHugepagesEnabled() {
  1485. pageSizeKb := options.HostOptions.HugepageSizeMb * 1024
  1486. input.PageSizeKB = &pageSizeKb
  1487. } else {
  1488. pageSizeKb := 4
  1489. input.PageSizeKB = &pageSizeKb
  1490. }
  1491. input.StorageDriver = api.DISK_DRIVER_LINUX
  1492. input.StorageType = h.sysinfo.StorageType
  1493. storageSize := storageman.GetManager().GetTotalCapacity()
  1494. input.StorageSize = &storageSize
  1495. // TODO optimize content data struct
  1496. input.SysInfo = jsonutils.Marshal(h.getSysInfo())
  1497. input.SN = h.sysinfo.SN
  1498. input.HostType = options.HostOptions.HostType
  1499. if len(options.HostOptions.Rack) > 0 {
  1500. input.Rack = options.HostOptions.Rack
  1501. }
  1502. if len(options.HostOptions.Slots) > 0 {
  1503. input.Slots = options.HostOptions.Slots
  1504. }
  1505. meta, _ := jsonutils.Marshal(h.getSysInfo()).GetMap()
  1506. input.Metadata = map[string]string{}
  1507. for k, v := range meta {
  1508. val, _ := v.GetString()
  1509. input.Metadata[k] = val
  1510. }
  1511. input.Version = version.GetShortString()
  1512. if !options.HostOptions.DisableLocalVpc {
  1513. input.OvnVersion = ovnutils.MustGetOvnVersion()
  1514. }
  1515. var (
  1516. res jsonutils.JSONObject
  1517. err error
  1518. )
  1519. if !h.isInit {
  1520. res, err = modules.Hosts.Update(h.GetSession(), hostId, jsonutils.Marshal(input))
  1521. if err != nil {
  1522. return nil, errors.Wrapf(err, "update host with input: %s", jsonutils.Marshal(input))
  1523. }
  1524. } else {
  1525. res, err = modules.Hosts.CreateInContext(h.GetSession(), jsonutils.Marshal(input), &modules.Zones, h.ZoneId)
  1526. if err != nil {
  1527. return nil, errors.Wrapf(err, "create host with zone: %q, input: %s", h.ZoneId, jsonutils.Marshal(input))
  1528. }
  1529. }
  1530. hostDetails := api.HostDetails{}
  1531. err = res.Unmarshal(&hostDetails)
  1532. if err != nil {
  1533. return nil, errors.Wrap(err, "unmarshal host details failed")
  1534. }
  1535. return &hostDetails, nil
  1536. }
  1537. func json2HostDetails(res jsonutils.JSONObject) (*api.HostDetails, error) {
  1538. hostDetails := api.HostDetails{}
  1539. err := res.Unmarshal(&hostDetails)
  1540. if err != nil {
  1541. return nil, errors.Wrap(err, "Unmarshal")
  1542. }
  1543. return &hostDetails, nil
  1544. }
  1545. func (h *SHostInfo) updateHostMetadata(hostname string) (*api.HostDetails, error) {
  1546. onK8s, _ := tokens.IsInsideKubernetesCluster()
  1547. meta := api.HostRegisterMetadata{
  1548. OnKubernetes: onK8s,
  1549. Hostname: hostname,
  1550. }
  1551. if len(h.SysError) > 0 {
  1552. meta.SysError = jsonutils.Marshal(h.SysError).String()
  1553. }
  1554. meta.RootPartitionTotalCapacityMB = int64(storageman.GetRootPartTotalCapacity())
  1555. meta.RootPartitionUsedCapacityMB = int64(storageman.GetRootPartUsedCapacity())
  1556. data := meta.JSON(meta)
  1557. res, err := modules.Hosts.SetMetadata(h.GetSession(), h.HostId, data)
  1558. if err != nil {
  1559. return nil, errors.Wrap(err, "SetMetadata")
  1560. }
  1561. return json2HostDetails(res)
  1562. }
  1563. // func (h *SHostInfo) SyncRootPartitionUsedCapacity() error {
  1564. // data := jsonutils.NewDict()
  1565. // data.Set("root_partition_used_capacity_mb", jsonutils.NewInt(int64(storageman.GetRootPartUsedCapacity())))
  1566. // _, err := modules.Hosts.SetMetadata(h.GetSession(), h.HostId, data)
  1567. // return err
  1568. // }
  1569. func (h *SHostInfo) SetOnHostDown(action string) error {
  1570. h.onHostDown = action
  1571. return fileutils2.FilePutContents(path.Join(options.HostOptions.ServersPath, hostconsts.HOST_HEALTH_FILENAME), h.onHostDown, false)
  1572. }
  1573. func (h *SHostInfo) parseReservedCpusInfo(hostInfo *api.HostDetails) error {
  1574. reservedCpusStr := hostInfo.Metadata[api.HOSTMETA_RESERVED_CPUS_INFO]
  1575. if reservedCpusStr != "" {
  1576. reservedCpusJson, err := jsonutils.ParseString(reservedCpusStr)
  1577. if err != nil {
  1578. return errors.Wrap(err, "parse reserved cpus info failed")
  1579. }
  1580. reservedCpusInfo := api.HostReserveCpusInput{}
  1581. err = reservedCpusJson.Unmarshal(&reservedCpusInfo)
  1582. if err != nil {
  1583. return errors.Wrap(err, "unmarshal host reserved cpus info failed")
  1584. }
  1585. h.reservedCpusInfo = &reservedCpusInfo
  1586. }
  1587. h.guestPinnedCpus = hostInfo.GuestPinnedCpus
  1588. return nil
  1589. }
  1590. func (h *SHostInfo) updateHostReservedMem(reserved int) error {
  1591. content := jsonutils.NewDict()
  1592. content.Set("mem_reserved", jsonutils.NewInt(int64(reserved)))
  1593. _, err := modules.Hosts.Update(h.GetSession(), h.HostId, content)
  1594. if err != nil {
  1595. return errors.Wrap(err, "Update mem_reserved")
  1596. }
  1597. return nil
  1598. }
  1599. func (h *SHostInfo) getKubeReservedMemMb() int {
  1600. // reserved for Kubelet
  1601. if h.kubeletConfig != nil {
  1602. memThreshold := h.kubeletConfig.GetEvictionConfig().GetHard().GetMemoryAvailable()
  1603. memBytes, _ := memThreshold.Value.Quantity.AsInt64()
  1604. memMb := int(math.Ceil(float64(memBytes) / 1024 / 1024))
  1605. log.Infof("Kubelet memory threshold subtracted: %dMB", memMb)
  1606. return memMb
  1607. }
  1608. return 0
  1609. }
  1610. func (h *SHostInfo) getOSReservedMemMb() int {
  1611. // reserved memory for OS
  1612. reserved := h.Mem.MemInfo.Total / 10
  1613. if reserved > options.HostOptions.MaxReservedMemory {
  1614. return options.HostOptions.MaxReservedMemory
  1615. }
  1616. if reserved == 0 {
  1617. panic("memory reserve value is 0, need help")
  1618. }
  1619. return reserved
  1620. }
  1621. func (h *SHostInfo) GetReservedMemMb() int {
  1622. if h.IsHugepagesEnabled() {
  1623. hp, _ := h.Mem.GetHugepages()
  1624. return h.GetMemory() - int(hp.BytesMb())
  1625. } else {
  1626. return h.getOSReservedMemMb() + h.getKubeReservedMemMb()
  1627. }
  1628. }
  1629. func (h *SHostInfo) PutHostOnline() error {
  1630. if len(h.SysError) > 0 {
  1631. log.Errorf("Host sys error: %v", h.SysError)
  1632. }
  1633. data := jsonutils.NewDict()
  1634. _, err := modules.Hosts.PerformAction(
  1635. h.GetSession(), h.HostId, api.HOST_ONLINE, data)
  1636. if err != nil {
  1637. logclient.AddSimpleActionLog(h, logclient.ACT_ONLINE, data, hostutils.GetComputeSession(context.Background()).GetToken(), false)
  1638. }
  1639. return err
  1640. }
  1641. func (h *SHostInfo) initHostNetworks(hostInfo *api.HostDetails) error {
  1642. err := h.ensureNicsHostwires(hostInfo)
  1643. if err != nil {
  1644. return errors.Wrap(err, "ensureNicsHostwires")
  1645. }
  1646. err = h.uploadNetworkInfo()
  1647. if err != nil {
  1648. return errors.Wrap(err, "uploadNetworkInfo")
  1649. }
  1650. return nil
  1651. }
  1652. func (h *SHostInfo) ensureNicsHostwires(hostInfo *api.HostDetails) error {
  1653. for _, nicInfo := range hostInfo.NicInfo {
  1654. if len(nicInfo.WireId) == 0 {
  1655. // no wire info, ignore
  1656. continue
  1657. }
  1658. nic := h.getMatchNic(nicInfo.Mac, nicInfo.VlanId)
  1659. if nic != nil {
  1660. if nicInfo.Bandwidth < 1 {
  1661. nicInfo.Bandwidth = 1000
  1662. }
  1663. err := nic.SetWireId(nicInfo.Wire, nicInfo.WireId, int64(nicInfo.Bandwidth))
  1664. if err != nil {
  1665. return errors.Wrap(err, "SetWireId")
  1666. }
  1667. } else {
  1668. log.Warningf("NIC not present %s, %d", nicInfo.Mac, nicInfo.VlanId)
  1669. }
  1670. }
  1671. return nil
  1672. }
  1673. func (h *SHostInfo) isVirtualFunction(nic string) bool {
  1674. physPortName, err := fileutils2.FileGetContents(path.Join("/sys/class/net", nic, "phys_port_name"))
  1675. if err != nil {
  1676. // log.Warningf("failed get nic %s phys_port_name: %s", nic, err)
  1677. return false
  1678. }
  1679. if strings.Contains(physPortName, "vf") {
  1680. log.Infof("nic %s is virtual function", nic)
  1681. return true
  1682. }
  1683. log.Infof("nic %s is not virtual function", nic)
  1684. return false
  1685. }
  1686. func (h *SHostInfo) uploadNetworkInfo() error {
  1687. phyNics, err := sysutils.Nics()
  1688. if err != nil {
  1689. return errors.Wrap(err, "parse physical nics info")
  1690. }
  1691. for i, pnic := range phyNics {
  1692. if h.isVirtualFunction(pnic.Dev) {
  1693. log.Warningf("phyNics %d %#v is a virtual function", i, pnic)
  1694. continue
  1695. }
  1696. nic := h.getMatchNic(pnic.Mac.String(), 1)
  1697. if nic != nil {
  1698. // no need to report managed NIC
  1699. log.Warningf("phyNics %d %#v is managed interface", i, pnic)
  1700. continue
  1701. }
  1702. // only report unmanaged physical NIC
  1703. err := h.doSendPhysicalNicInfo(pnic)
  1704. if err != nil {
  1705. return errors.Wrapf(err, "doSendPhysicalNicInfo %s", pnic.Dev)
  1706. }
  1707. }
  1708. var hostDetails *api.HostDetails
  1709. for _, nic := range h.Nics {
  1710. log.Infof("host nic: %s", jsonutils.Marshal(nic).String())
  1711. if nic.IsHostLocal() {
  1712. continue
  1713. }
  1714. if len(nic.WireId) == 0 {
  1715. // nic info not uploaded yet
  1716. if len(nic.Wire) == 0 {
  1717. // no wire defined, find from region
  1718. kwargs := jsonutils.NewDict()
  1719. if len(nic.Ip) > 0 {
  1720. kwargs.Set("ip", jsonutils.NewString(nic.Ip))
  1721. } else if len(nic.Ip6) > 0 {
  1722. kwargs.Set("ip", jsonutils.NewString(nic.Ip6))
  1723. }
  1724. kwargs.Set("is_classic", jsonutils.JSONTrue)
  1725. kwargs.Set("scope", jsonutils.NewString("system"))
  1726. kwargs.Set("limit", jsonutils.NewInt(0))
  1727. wireInfo, err := hostutils.GetWireOfIp(context.Background(), kwargs)
  1728. if err != nil {
  1729. return errors.Wrapf(err, "GetWireOfIp args: %s", kwargs.String())
  1730. }
  1731. nic.Wire, _ = wireInfo.GetString("name")
  1732. hostDetails, err = h.doUploadNicInfo(nic)
  1733. if err != nil {
  1734. return errors.Wrapf(err, "doUploadNicInfo with ip %s", nic.Inter)
  1735. }
  1736. } else {
  1737. // no ip on interface, wire defined
  1738. hostDetails, err = h.doUploadNicInfo(nic)
  1739. if err != nil {
  1740. return errors.Wrapf(err, "doUploadNicInfo with wire %s", nic.Inter)
  1741. }
  1742. }
  1743. } else {
  1744. // already uploaded, redo add-nic
  1745. hostDetails, err = h.doUploadNicInfo(nic)
  1746. if err != nil {
  1747. return errors.Wrapf(err, "doSyncNicInfo %s", nic.Inter)
  1748. }
  1749. }
  1750. }
  1751. if hostDetails != nil {
  1752. err = h.ensureNicsHostwires(hostDetails)
  1753. if err != nil {
  1754. return errors.Wrap(err, "onGetHostNetworkInfo")
  1755. }
  1756. }
  1757. return nil
  1758. }
  1759. func (h *SHostInfo) doSendPhysicalNicInfo(nic *types.SNicDevInfo) error {
  1760. log.Infof("upload physical nic: %s(%s)", nic.Dev, nic.Mac)
  1761. _, err := h.doUploadNicInfoInternal(nic.Dev, nic.Mac.String(), 1, "", "", "", "", nic.Up != nil && *nic.Up)
  1762. if err != nil {
  1763. return errors.Wrap(err, "doUploadNicInfoInternal")
  1764. }
  1765. return nil
  1766. }
  1767. func (h *SHostInfo) doUploadNicInfo(nic *SNIC) (*api.HostDetails, error) {
  1768. hostDetails, err := h.doUploadNicInfoInternal(nic.Inter, nic.BridgeDev.GetMac(), nic.BridgeDev.GetVlanId(), nic.Wire, nic.Bridge, nic.Ip, nic.Ip6, true)
  1769. if err != nil {
  1770. return nil, errors.Wrap(err, "doUploadNicInfoInternal")
  1771. }
  1772. return hostDetails, nil
  1773. }
  1774. func (h *SHostInfo) doUploadNicInfoInternal(ifname, mac string, vlanId int, wire, bridge, ipaddr, ip6addr string, isUp bool) (*api.HostDetails, error) {
  1775. log.Infof("Upload NIC br:%s if:%s ip:%s ip6:%s", bridge, ifname, ipaddr, ip6addr)
  1776. content := jsonutils.NewDict()
  1777. content.Set("mac", jsonutils.NewString(mac))
  1778. content.Set("vlan_id", jsonutils.NewInt(int64(vlanId)))
  1779. content.Set("wire", jsonutils.NewString(wire))
  1780. content.Set("bridge", jsonutils.NewString(bridge))
  1781. content.Set("interface", jsonutils.NewString(ifname))
  1782. if isUp {
  1783. content.Set("link_up", jsonutils.JSONTrue)
  1784. } else {
  1785. content.Set("link_up", jsonutils.JSONFalse)
  1786. }
  1787. if len(ipaddr) > 0 {
  1788. content.Set("ip_addr", jsonutils.NewString(ipaddr))
  1789. if ipaddr == h.GetMasterIp() {
  1790. content.Set("nic_type", jsonutils.NewString(string(api.NIC_TYPE_ADMIN)))
  1791. }
  1792. // always try to allocate from reserved pool
  1793. content.Set("reserve", jsonutils.JSONTrue)
  1794. }
  1795. if len(ip6addr) > 0 {
  1796. content.Set("ip6_addr", jsonutils.NewString(ip6addr))
  1797. if ip6addr == h.GetMasterIp() {
  1798. content.Set("nic_type", jsonutils.NewString(string(api.NIC_TYPE_ADMIN)))
  1799. }
  1800. // always try to allocate from reserved pool
  1801. content.Set("reserve", jsonutils.JSONTrue)
  1802. }
  1803. res, err := modules.Hosts.PerformAction(h.GetSession(), h.HostId, "add-netif", content)
  1804. if err != nil {
  1805. return nil, errors.Wrapf(err, "modules.Hosts.PerformAction add-netif: %s", content.String())
  1806. }
  1807. return json2HostDetails(res)
  1808. }
  1809. /*func (h *SHostInfo) doSyncNicInfo(nic *SNIC) error {
  1810. content := jsonutils.NewDict()
  1811. content.Set("bridge", jsonutils.NewString(nic.Bridge))
  1812. content.Set("interface", jsonutils.NewString(nic.Inter))
  1813. query := jsonutils.NewDict()
  1814. query.Set("mac_addr", jsonutils.NewString(nic.BridgeDev.GetMac()))
  1815. _, err := modules.Hostwires.Update(h.GetSession(),
  1816. h.HostId, nic.WireId, query, content)
  1817. if err != nil {
  1818. return errors.Wrap(err, "modules.Hostwires.Update")
  1819. }
  1820. return nil
  1821. }*/
  1822. /*func (h *SHostInfo) onUploadNicInfoSucc(nic *SNIC) error {
  1823. res, err := modules.Hostwires.Get(h.GetSession(), h.HostId, nic.Network, nil)
  1824. if err != nil {
  1825. return errors.Wrap(err, "modules.Hostwires.Get")
  1826. } else {
  1827. bridge, _ := res.GetString("bridge")
  1828. iface, _ := res.GetString("interface")
  1829. macAddr, _ := res.GetString("mac_addr")
  1830. nic = h.GetMatchNic(bridge, iface, macAddr)
  1831. if nic != nil {
  1832. wire, _ := res.GetString("wire")
  1833. wireId, _ := res.GetString("wire_id")
  1834. bandwidth, err := res.Int("bandwidth")
  1835. if err != nil {
  1836. bandwidth = 1000
  1837. }
  1838. nic.SetWireId(wire, wireId, bandwidth)
  1839. } else {
  1840. return errors.Error("GetMatchNic failed!!!")
  1841. }
  1842. }
  1843. return nil
  1844. }*/
  1845. func (h *SHostInfo) initStorages() error {
  1846. err := h.initLocalStorageImageManager()
  1847. if err != nil {
  1848. return errors.Wrap(err, "init local storage image ")
  1849. }
  1850. hoststorages, err := h.getStorageInfo()
  1851. if err != nil {
  1852. return errors.Wrap(err, "get storage info")
  1853. }
  1854. h.initStoragesInternal(hoststorages)
  1855. return nil
  1856. }
  1857. func (h *SHostInfo) initLocalStorageImageManager() error {
  1858. localImageCachePath := storageman.GetManager().LocalStorageImagecacheManager.GetPath()
  1859. params := jsonutils.NewDict()
  1860. params.Set("external_id", jsonutils.NewString(h.HostId))
  1861. params.Set("path", jsonutils.NewString(localImageCachePath))
  1862. res, err := modules.Storagecaches.List(h.GetSession(), params)
  1863. if err != nil {
  1864. return errors.Wrap(err, "Storagecaches.List")
  1865. }
  1866. var scid string
  1867. if len(res.Data) == 0 {
  1868. // create local storage cache
  1869. body := jsonutils.NewDict()
  1870. body.Set("name", jsonutils.NewString(fmt.Sprintf(
  1871. "local-%s-%s", h.FullName, time.Now().String())))
  1872. body.Set("path", jsonutils.NewString(localImageCachePath))
  1873. body.Set("external_id", jsonutils.NewString(h.HostId))
  1874. sc, err := modules.Storagecaches.Create(h.GetSession(), body)
  1875. if err != nil {
  1876. return errors.Wrap(err, "Storagecaches.Create")
  1877. }
  1878. scid, _ = sc.GetString("id")
  1879. } else {
  1880. scid, _ = res.Data[0].GetString("id")
  1881. }
  1882. storageman.GetManager().LocalStorageImagecacheManager.SetStoragecacheId(scid)
  1883. return nil
  1884. }
  1885. func (h *SHostInfo) getStorageInfo() ([]jsonutils.JSONObject, error) {
  1886. params := jsonutils.NewDict()
  1887. params.Set("details", jsonutils.JSONTrue)
  1888. params.Set("limit", jsonutils.NewInt(0))
  1889. params.Set("scope", jsonutils.NewString("system"))
  1890. res, err := modules.Hoststorages.ListDescendent(h.GetSession(), h.HostId, params)
  1891. if err != nil {
  1892. return nil, errors.Wrap(err, "Hoststorages.ListDescendent")
  1893. } else {
  1894. return res.Data, nil
  1895. }
  1896. }
  1897. func (h *SHostInfo) initStoragesInternal(hoststorages []jsonutils.JSONObject) {
  1898. var detachStorages = []jsonutils.JSONObject{}
  1899. storageManager := storageman.GetManager()
  1900. for _, hs := range hoststorages {
  1901. storagetype, _ := hs.GetString("storage_type")
  1902. mountPoint, _ := hs.GetString("mount_point")
  1903. storagecacheId, _ := hs.GetString("storagecache_id")
  1904. imagecachePath, _ := hs.GetString("imagecache_path")
  1905. storageId, _ := hs.GetString("storage_id")
  1906. storageName, _ := hs.GetString("storage")
  1907. storageConf, _ := hs.Get("storage_conf")
  1908. log.Infof("Storage %s(%s) mountpoint %s", storageName, storagetype, mountPoint)
  1909. if !utils.IsInStringArray(storagetype, api.STORAGE_LOCAL_TYPES) {
  1910. storage := storageManager.NewSharedStorageInstance(mountPoint, storagetype)
  1911. if storage != nil {
  1912. storage.SetStoragecacheId(storagecacheId)
  1913. if err := storage.SetStorageInfo(storageId, storageName, storageConf); err != nil {
  1914. h.AppendError(err.Error(), "storages", storageId, storageName)
  1915. continue
  1916. }
  1917. if err := storage.Accessible(); err != nil {
  1918. h.AppendError(fmt.Sprintf("check storage accessible failed: %s", err.Error()),
  1919. "storages", storageId, storageName)
  1920. continue
  1921. }
  1922. storageManager.Storages = append(storageManager.Storages, storage)
  1923. storageManager.InitSharedStorageImageCache(
  1924. storagetype, storagecacheId, imagecachePath, storage)
  1925. }
  1926. } else {
  1927. // Storage type local
  1928. storage, _ := storageManager.GetStorageByPath(mountPoint)
  1929. if storage != nil {
  1930. storage.SetStoragecacheId(storagecacheId)
  1931. if IsRootPartition(mountPoint) {
  1932. // update host storage is root partition
  1933. params := jsonutils.NewDict()
  1934. params.Set("is_root_partiton", jsonutils.JSONTrue)
  1935. _, err := modules.Hoststorages.Update(h.GetSession(), h.HostId, storageId, nil, params)
  1936. if err != nil {
  1937. h.AppendError(
  1938. fmt.Sprintf("Request update host storage %s with params %s: %s", storageId, params, err),
  1939. "storages", storageId, storageName)
  1940. }
  1941. }
  1942. if err := storage.SetStorageInfo(storageId, storageName, storageConf); err != nil {
  1943. h.AppendError(
  1944. fmt.Sprintf("Set storage info %s/%s/%s failed: %s", storageId, storageName, storageConf, err),
  1945. "storages", storageId, storageName)
  1946. continue
  1947. }
  1948. if storagetype == api.STORAGE_LVM {
  1949. // lvm set storage image cache info
  1950. storageManager.InitLVMStorageImageCache(storagecacheId, mountPoint, storage)
  1951. }
  1952. } else {
  1953. // XXX hack: storage type baremetal is a converted host,reserve storage
  1954. if storagetype != api.STORAGE_BAREMETAL {
  1955. detachStorages = append(detachStorages, hs)
  1956. }
  1957. }
  1958. }
  1959. }
  1960. if len(detachStorages) > 0 {
  1961. go StartDetachStorages(detachStorages)
  1962. }
  1963. for _, s := range storageman.GetManager().Storages {
  1964. if !s.IsLocal() {
  1965. // only local storage need to do the sync
  1966. continue
  1967. }
  1968. storageId := s.GetId()
  1969. storageName := s.GetStorageName()
  1970. storageConf := s.GetStorageConf()
  1971. if err := s.SetStorageInfo(s.GetId(), s.GetStorageName(), s.GetStorageConf()); err != nil {
  1972. h.AppendError(fmt.Sprintf("Set storage info %s/%s/%s failed: %s", storageId, storageName, storageConf, err.Error()),
  1973. "storages", storageId, storageName)
  1974. continue
  1975. }
  1976. res, err := s.SyncStorageInfo()
  1977. if err != nil {
  1978. h.AppendError(fmt.Sprintf("sync storage %s failed: %s", s.GetStorageName(), err.Error()), "storages", storageId, storageName)
  1979. continue
  1980. }
  1981. {
  1982. err = h.onSyncStorageInfoSucc(s, res)
  1983. if err != nil {
  1984. h.AppendError(err.Error(), "storages", storageId, storageName)
  1985. continue
  1986. }
  1987. }
  1988. }
  1989. }
  1990. func (h *SHostInfo) onSyncStorageInfoSucc(storage storageman.IStorage, storageInfo jsonutils.JSONObject) error {
  1991. log.Infof("storage id %s", storage.GetId())
  1992. if len(storage.GetId()) == 0 {
  1993. log.Errorf("storage config %s", storageInfo)
  1994. id, _ := storageInfo.GetString("id")
  1995. name, _ := storageInfo.GetString("name")
  1996. storageConf, _ := storageInfo.Get("storage_conf")
  1997. if err := storage.SetStorageInfo(id, name, storageConf); err != nil {
  1998. return errors.Wrapf(err, "Set storage info %s/%s/%s failed", id, name, storageConf)
  1999. }
  2000. err := h.attachStorage(storage)
  2001. if err != nil {
  2002. return errors.Wrap(err, "attachStorage")
  2003. }
  2004. }
  2005. return nil
  2006. }
  2007. func (h *SHostInfo) attachStorage(storage storageman.IStorage) error {
  2008. content := jsonutils.NewDict()
  2009. content.Set("mount_point", jsonutils.NewString(storage.GetPath()))
  2010. content.Set("is_root_partition", jsonutils.NewBool(IsRootPartition(storage.GetPath())))
  2011. _, err := modules.Hoststorages.Attach(h.GetSession(), h.HostId, storage.GetId(), content)
  2012. if err != nil {
  2013. return errors.Wrap(err, "Hoststorages.Attach")
  2014. }
  2015. return nil
  2016. }
  2017. func (h *SHostInfo) getRemoteIsolatedDevices() ([]jsonutils.JSONObject, error) {
  2018. params := jsonutils.NewDict()
  2019. params.Set("details", jsonutils.JSONTrue)
  2020. params.Set("limit", jsonutils.NewInt(0))
  2021. params.Set("host", jsonutils.NewString(h.GetHostId()))
  2022. params.Set("scope", jsonutils.NewString("system"))
  2023. res, err := modules.IsolatedDevices.List(h.GetSession(), params)
  2024. if err != nil {
  2025. return nil, err
  2026. }
  2027. return res.Data, nil
  2028. }
  2029. func (h *SHostInfo) initIsolatedDevices() error {
  2030. info, err := h.probeSyncIsolatedDevices()
  2031. if err != nil {
  2032. return errors.Wrap(err, "probeSyncIsolatedDevices")
  2033. }
  2034. log.Infof("probeSyncIsolatedDevices %s", info)
  2035. return nil
  2036. }
  2037. func (h *SHostInfo) getNicsInterfaces(nics []string) ([]isolated_device.HostNic, error) {
  2038. if len(nics) == 0 {
  2039. return nil, nil
  2040. }
  2041. log.Infof("sriov input nics %v", nics)
  2042. res := []isolated_device.HostNic{}
  2043. for i := 0; i < len(nics); i++ {
  2044. found := false
  2045. for j := 0; j < len(h.Nics); j++ {
  2046. if nics[i] == h.Nics[j].Inter {
  2047. if fileutils2.Exists(fmt.Sprintf("/sys/class/net/%s/bonding/slaves", h.Nics[j].Inter)) {
  2048. interStr, err := fileutils2.FileGetContents(fmt.Sprintf("/sys/class/net/%s/bonding/slaves", h.Nics[j].Inter))
  2049. if err != nil {
  2050. return nil, err
  2051. }
  2052. inters := strings.Split(strings.TrimSpace(interStr), " ")
  2053. for _, inter := range inters {
  2054. res = append(res, isolated_device.HostNic{
  2055. Bridge: h.Nics[j].Bridge,
  2056. Interface: inter,
  2057. Wire: h.Nics[j].WireId,
  2058. })
  2059. }
  2060. } else {
  2061. res = append(res, isolated_device.HostNic{
  2062. Bridge: h.Nics[j].Bridge,
  2063. Interface: h.Nics[j].Inter,
  2064. Wire: h.Nics[j].WireId,
  2065. })
  2066. }
  2067. found = true
  2068. }
  2069. }
  2070. if !found {
  2071. res = append(res, isolated_device.HostNic{h.Nics[0].Bridge, nics[i], h.Nics[0].WireId})
  2072. }
  2073. }
  2074. log.Infof("sriov output nics %v", res)
  2075. return res, nil
  2076. }
  2077. func (h *SHostInfo) probeSyncIsolatedDevices() (*jsonutils.JSONArray, error) {
  2078. if !h.IsKvmSupport() && !h.IsContainerHost() {
  2079. // skip probe isolated device on kvm not supported
  2080. log.Errorf("KVM is not supported, skip probe isolated devices")
  2081. return nil, nil
  2082. }
  2083. if h.IsKvmSupport() {
  2084. for _, driver := range []string{"vfio", "vfio_iommu_type1", "vfio-pci"} {
  2085. if out, err := procutils.NewRemoteCommandAsFarAsPossible("modprobe", driver).Output(); err != nil {
  2086. log.Errorf("failed probe driver %s: %s %s", driver, out, err)
  2087. }
  2088. }
  2089. }
  2090. enableDevWhitelist := options.HostOptions.EnableIsolatedDeviceWhitelist
  2091. offloadNics, err := h.getNicsInterfaces(options.HostOptions.OvsOffloadNics)
  2092. if err != nil {
  2093. return nil, err
  2094. }
  2095. sriovNics, err := h.getNicsInterfaces(options.HostOptions.SRIOVNics)
  2096. if err != nil {
  2097. return nil, err
  2098. }
  2099. h.IsolatedDeviceMan.ProbePCIDevices(
  2100. options.HostOptions.DisableGPU,
  2101. options.HostOptions.DisableUSB,
  2102. options.HostOptions.DisableCustomDevice,
  2103. sriovNics, offloadNics,
  2104. options.HostOptions.PTNVMEConfigs,
  2105. options.HostOptions.AMDVgpuPFs,
  2106. options.HostOptions.NVIDIAVgpuPFs,
  2107. options.HostOptions.EnableCudaMPS,
  2108. options.HostOptions.EnableContainerAscendNPU,
  2109. enableDevWhitelist,
  2110. )
  2111. objs, err := h.getRemoteIsolatedDevices()
  2112. if err != nil {
  2113. return nil, errors.Wrap(err, "getRemoteIsolatedDevices")
  2114. }
  2115. // devs need update
  2116. var devsNeedUpdate = map[string]bool{}
  2117. for _, obj := range objs {
  2118. info := isolated_device.CloudDeviceInfo{}
  2119. if err := obj.Unmarshal(&info); err != nil {
  2120. return nil, errors.Wrapf(err, "unmarshal isolated device %s to cloud device info", obj)
  2121. }
  2122. dev := h.IsolatedDeviceMan.GetDeviceByIdent(info.VendorDeviceId, info.Addr, info.MdevId)
  2123. if dev != nil {
  2124. dev.SetDeviceInfo(info)
  2125. devsNeedUpdate[dev.GetCloudId()] = h.IsolatedDeviceMan.CheckDevIsNeedUpdate(dev, &info)
  2126. } else {
  2127. // detach device
  2128. h.IsolatedDeviceMan.AppendDetachedDevice(&info)
  2129. }
  2130. }
  2131. h.IsolatedDeviceMan.StartDetachTask()
  2132. h.IsolatedDeviceMan.BatchCustomProbe()
  2133. // sync each isolated device found
  2134. eg := errgroup.Group{}
  2135. // limits the number of active goroutines in this group to at most
  2136. eg.SetLimit(16)
  2137. mtx := sync.Mutex{}
  2138. updateDevs := jsonutils.NewArray()
  2139. devs := h.IsolatedDeviceMan.GetDevices()
  2140. for i := range devs {
  2141. dev := devs[i]
  2142. eg.Go(func() error {
  2143. needUpdate := false
  2144. if need, ok := devsNeedUpdate[dev.GetCloudId()]; !ok || need {
  2145. needUpdate = true
  2146. }
  2147. if obj, err := isolated_device.SyncDeviceInfo(h.GetSession(), h.HostId, dev, needUpdate); err != nil {
  2148. log.Errorf("Sync deviceInfo %s error: %v", dev.String(), err)
  2149. return errors.Wrapf(err, "Sync device %s", dev.String())
  2150. } else {
  2151. if obj != nil {
  2152. mtx.Lock()
  2153. updateDevs.Add(obj)
  2154. mtx.Unlock()
  2155. }
  2156. return nil
  2157. }
  2158. })
  2159. }
  2160. if err := eg.Wait(); err != nil {
  2161. return nil, err
  2162. }
  2163. return updateDevs, nil
  2164. }
  2165. func (h *SHostInfo) deployAdminAuthorizedKeys() {
  2166. err := fsdriver.DeployAdminAuthorizedKeys(h.GetSession())
  2167. if err != nil {
  2168. h.AppendHostError(fmt.Sprintf("DeployAdminAuthorizedKeys: %s", err))
  2169. }
  2170. }
  2171. func (h *SHostInfo) onSucc() {
  2172. if !h.stopped && !h.isRegistered {
  2173. log.Infof("Host registration process success....")
  2174. if err := h.save(); err != nil {
  2175. panic(err.Error())
  2176. }
  2177. //h.StartPinger()
  2178. // if h.registerCallback != nil {
  2179. // h.registerCallback()
  2180. // }
  2181. h.isRegistered = true
  2182. // Notify caller, host register is success
  2183. // close(h.IsRegistered)
  2184. }
  2185. }
  2186. func (h *SHostInfo) AppendHostError(content string) {
  2187. h.AppendError(content, "hosts", h.HostId, h.GetName())
  2188. }
  2189. func (h *SHostInfo) AppendError(content, errType, id, name string) {
  2190. if errType == "" {
  2191. errType = "hosts"
  2192. id = h.HostId
  2193. name = h.GetName()
  2194. }
  2195. es, ok := h.SysError[errType]
  2196. if !ok {
  2197. h.SysError[errType] = make([]api.HostError, 0)
  2198. }
  2199. h.SysError[errType] = append(es, api.HostError{Type: errType, Id: id, Name: name, Content: content, Time: time.Now()})
  2200. }
  2201. func (h *SHostInfo) RemoveErrorType(errType string) {
  2202. delete(h.SysError, errType)
  2203. }
  2204. func (h *SHostInfo) save() error {
  2205. if h.saved {
  2206. return nil
  2207. } else {
  2208. h.saved = true
  2209. }
  2210. if err := h.registerHostlocalServer(); err != nil {
  2211. return err
  2212. }
  2213. // TODO XXX >>> ???
  2214. // file put content
  2215. if err := h.setupBridges(); err != nil {
  2216. return err
  2217. }
  2218. return nil
  2219. }
  2220. func (h *SHostInfo) setupBridges() error {
  2221. for _, n := range h.Nics {
  2222. if err := n.BridgeDev.WarmupConfig(); err != nil {
  2223. log.Errorln(err)
  2224. return err
  2225. }
  2226. }
  2227. return nil
  2228. }
  2229. func (h *SHostInfo) registerHostlocalServer() error {
  2230. for _, n := range h.Nics {
  2231. mac := h.GetMasterMac()
  2232. if len(mac) == 0 {
  2233. panic("len mac == 0")
  2234. }
  2235. ip := h.GetMasterIp()
  2236. if len(ip) == 0 {
  2237. panic("len ip == 0")
  2238. }
  2239. err := n.BridgeDev.RegisterHostlocalServer(mac, ip)
  2240. if err != nil {
  2241. return err
  2242. }
  2243. }
  2244. return nil
  2245. }
  2246. func (h *SHostInfo) GetId() string {
  2247. return h.HostId
  2248. }
  2249. func (h *SHostInfo) GetName() string {
  2250. return h.getHostname()
  2251. }
  2252. func (h *SHostInfo) Keyword() string {
  2253. return "host"
  2254. }
  2255. func (h *SHostInfo) stop() {
  2256. log.Infof("Host Info stop ...")
  2257. h.unregister()
  2258. for _, nic := range h.Nics {
  2259. nic.ExitCleanup()
  2260. }
  2261. }
  2262. func (h *SHostInfo) unregister() {
  2263. isLog := false
  2264. for {
  2265. input := api.HostOfflineInput{
  2266. Reason: "host stop",
  2267. }
  2268. _, err := modules.Hosts.PerformAction(h.GetSession(), h.HostId, api.HOST_OFFLINE, jsonutils.Marshal(input))
  2269. if err != nil {
  2270. if errors.Cause(err) == httperrors.ErrResourceNotFound {
  2271. log.Errorf("host not found on region, may be removed, exit cleanly")
  2272. break
  2273. }
  2274. if !isLog {
  2275. logclient.AddSimpleActionLog(h, logclient.ACT_OFFLINE, err, hostutils.GetComputeSession(context.Background()).GetToken(), false)
  2276. isLog = true
  2277. }
  2278. time.Sleep(time.Second * 1)
  2279. continue
  2280. }
  2281. break
  2282. }
  2283. h.stopped = true
  2284. }
  2285. func (h *SHostInfo) OnCatalogChanged(catalog mcclient.KeystoneServiceCatalogV3) {
  2286. // TODO: dynamic probe endpoint type
  2287. svcs := os.Getenv("HOST_SYSTEM_SERVICES_OFF")
  2288. defaultEndpointType := options.HostOptions.SessionEndpointType
  2289. if len(defaultEndpointType) == 0 {
  2290. defaultEndpointType = identityapi.EndpointInterfacePublic
  2291. }
  2292. s := auth.AdminSession(context.Background(), options.HostOptions.Region, h.Zone, defaultEndpointType)
  2293. // replace session catalog
  2294. s.SetServiceCatalog(catalog)
  2295. if options.HostOptions.ManageNtpConfiguration {
  2296. ntpd := system_service.GetService("ntpd")
  2297. urls, _ := s.GetServiceURLs("ntp", defaultEndpointType, httputils.POST)
  2298. if len(urls) > 0 {
  2299. log.Infof("Get Ntp urls: %v", urls)
  2300. } else {
  2301. urls = []string{"ntp://cn.pool.ntp.org",
  2302. "ntp://0.cn.pool.ntp.org",
  2303. "ntp://1.cn.pool.ntp.org",
  2304. "ntp://2.cn.pool.ntp.org",
  2305. "ntp://3.cn.pool.ntp.org"}
  2306. }
  2307. if !reflect.DeepEqual(ntpd.GetConf(), urls) || (!strings.Contains(svcs, "ntpd") && !ntpd.IsActive()) {
  2308. ntpd.SetConf(urls)
  2309. ntpd.BgReload(map[string]interface{}{"servers": urls})
  2310. }
  2311. }
  2312. telegraf := system_service.GetService("telegraf")
  2313. conf := map[string]interface{}{}
  2314. conf["hostname"] = h.getHostname()
  2315. conf["server_path"] = options.HostOptions.ServersPath
  2316. conf["tags"] = map[string]string{
  2317. "id": h.HostId,
  2318. "host_id": h.HostId,
  2319. "zone_id": h.ZoneId,
  2320. "zone": h.Zone,
  2321. "cloudregion_id": h.CloudregionId,
  2322. "cloudregion": h.Cloudregion,
  2323. "domain_id": h.Domain_id,
  2324. "project_domain": h.Project_domain,
  2325. "region": options.HostOptions.Region,
  2326. "host_ip": h.GetMasterIp(),
  2327. hostconsts.TELEGRAF_TAG_KEY_BRAND: hostconsts.TELEGRAF_TAG_ONECLOUD_BRAND,
  2328. hostconsts.TELEGRAF_TAG_KEY_RES_TYPE: hostconsts.TELEGRAF_TAG_ONECLOUD_RES_TYPE,
  2329. hostconsts.TELEGRAF_TAG_KEY_HOST_TYPE: hostconsts.TELEGRAF_TAG_ONECLOUD_HOST_TYPE_HOST,
  2330. hostconsts.TELEGRAF_TAG_KEY_HYPERVISOR: options.HostOptions.HostType,
  2331. }
  2332. conf["nics"] = h.getNicsTelegrafConf()
  2333. urls, _ := s.GetServiceURLs("kafka", defaultEndpointType, httputils.POST)
  2334. if len(urls) > 0 {
  2335. kafkaConf := map[string]interface{}{
  2336. "brokers": urls,
  2337. "topic": options.HostOptions.TelegrafKafkaOutputTopic,
  2338. }
  2339. if len(options.HostOptions.TelegrafKafkaOutputSaslUsername) > 0 {
  2340. kafkaConf["sasl_username"] = options.HostOptions.TelegrafKafkaOutputSaslUsername
  2341. }
  2342. if len(options.HostOptions.TelegrafKafkaOutputSaslPassword) > 0 {
  2343. kafkaConf["sasl_password"] = options.HostOptions.TelegrafKafkaOutputSaslPassword
  2344. }
  2345. if len(options.HostOptions.TelegrafKafkaOutputSaslMechanism) > 0 {
  2346. kafkaConf["sasl_mechanism"] = options.HostOptions.TelegrafKafkaOutputSaslMechanism
  2347. }
  2348. conf["kafka"] = kafkaConf
  2349. }
  2350. urls, _ = s.GetServiceURLs("opentsdb", defaultEndpointType, httputils.POST)
  2351. if len(urls) > 0 {
  2352. conf["opentsdb"] = map[string]interface{}{
  2353. "url": urls[0],
  2354. }
  2355. }
  2356. if h.IsContainerHost() {
  2357. h.injectTelegrafDeviceConfig(conf)
  2358. }
  2359. tsdb, _ := tsdb.GetDefaultServiceSource(s, defaultEndpointType)
  2360. if tsdb != nil && len(tsdb.URLs) > 0 {
  2361. conf[apis.SERVICE_TYPE_INFLUXDB] = map[string]interface{}{
  2362. "url": tsdb.URLs,
  2363. "database": "telegraf",
  2364. "tsdb_type": tsdb.Type,
  2365. }
  2366. }
  2367. if !reflect.DeepEqual(telegraf.GetConf(), conf) || (!strings.Contains(svcs, "telegraf") && !telegraf.IsActive()) {
  2368. telegraf.SetConf(conf)
  2369. if !strings.Contains(svcs, "telegraf") {
  2370. telegraf.BgReload(conf)
  2371. } else {
  2372. telegraf.BgReloadConf(conf)
  2373. }
  2374. }
  2375. /*urls, _ = catalog.GetServiceURLs("elasticsearch",
  2376. options.HostOptions.Region, "zone", defaultEndpointType)
  2377. if len(urls) > 0 {
  2378. conf["elasticsearch"] = map[string]interface{}{"url": urls[0]}
  2379. fluentbit := system_service.GetService("fluentbit")
  2380. if !reflect.DeepEqual(fluentbit.GetConf(), conf) || !fluentbit.IsActive() {
  2381. fluentbit.SetConf(conf)
  2382. fluentbit.BgReload(conf)
  2383. }
  2384. }*/
  2385. }
  2386. func (h *SHostInfo) injectTelegrafDeviceConfig(conf map[string]interface{}) {
  2387. devs := h.GetIsolatedDeviceManager().GetDevices()
  2388. if len(devs) == 0 {
  2389. return
  2390. }
  2391. // group dev
  2392. hasNetint := false
  2393. hasVasmi := false
  2394. hasNvidiasmi := false
  2395. for _, dev := range devs {
  2396. devType := dev.GetDeviceType()
  2397. switch devType {
  2398. case string(isolated_device.ContainerDeviceTypeCphAMDGPU):
  2399. confMap, ok := conf[system_service.TELEGRAF_INPUT_RADEONTOP].(map[string]interface{})
  2400. if !ok {
  2401. conf[system_service.TELEGRAF_INPUT_RADEONTOP] = map[string]interface{}{
  2402. system_service.TELEGRAF_INPUT_CONF_BIN_PATH: "/usr/bin/radeontop",
  2403. system_service.TELEGRAF_INPUT_RADEONTOP_DEV_PATHS: []string{dev.GetDevicePath()},
  2404. }
  2405. } else {
  2406. devPaths := confMap[system_service.TELEGRAF_INPUT_RADEONTOP_DEV_PATHS].([]string)
  2407. if !utils.IsInStringArray(dev.GetDevicePath(), devPaths) {
  2408. devPaths = append(devPaths, dev.GetDevicePath())
  2409. confMap[system_service.TELEGRAF_INPUT_RADEONTOP_DEV_PATHS] = devPaths
  2410. }
  2411. }
  2412. case string(isolated_device.ContainerNetintCAQuadra), string(isolated_device.ContainerNetintCAASIC):
  2413. hasNetint = true
  2414. continue
  2415. case string(isolated_device.ContainerDeviceTypeVastaitechGpu):
  2416. hasVasmi = true
  2417. continue
  2418. case string(isolated_device.ContainerDeviceTypeNvidiaGpu), string(isolated_device.ContainerDeviceTypeNvidiaMps), string(isolated_device.ContainerDeviceTypeNvidiaGpuShare):
  2419. hasNvidiasmi = true
  2420. }
  2421. }
  2422. if hasNetint {
  2423. conf[system_service.TELEGRAF_INPUT_NETDEV] = map[string]interface{}{
  2424. system_service.TELEGRAF_INPUT_CONF_BIN_PATH: "/usr/bin/ni_rsrc_mon",
  2425. }
  2426. }
  2427. if hasVasmi {
  2428. conf[system_service.TELEGRAF_INPUT_VASMI] = map[string]interface{}{
  2429. system_service.TELEGRAF_INPUT_CONF_BIN_PATH: "/usr/bin/vasmi",
  2430. }
  2431. }
  2432. if hasNvidiasmi {
  2433. conf[system_service.TELEGRAF_INPUT_NVIDIASMI] = struct{}{}
  2434. }
  2435. }
  2436. func (h *SHostInfo) getNicsTelegrafConf() []map[string]interface{} {
  2437. var ret = make([]map[string]interface{}, 0)
  2438. existing := make(map[string]struct{})
  2439. for i, n := range h.Nics {
  2440. ret = append(ret, map[string]interface{}{
  2441. "name": n.Inter,
  2442. "alias": fmt.Sprintf("eth%d", i),
  2443. "speed": n.Bandwidth,
  2444. })
  2445. ret = append(ret, map[string]interface{}{
  2446. "name": n.Inter,
  2447. "alias": fmt.Sprintf("br%d", i),
  2448. "speed": n.Bandwidth,
  2449. })
  2450. existing[n.Inter] = struct{}{}
  2451. }
  2452. phyNics, _ := sysutils.Nics()
  2453. for _, pnic := range phyNics {
  2454. if _, ok := existing[pnic.Dev]; !ok {
  2455. ret = append(ret, map[string]interface{}{
  2456. "name": pnic.Dev,
  2457. "speed": pnic.Speed,
  2458. })
  2459. }
  2460. }
  2461. return ret
  2462. }
  2463. func (h *SHostInfo) ReportHostDmesg(entries []api.SKmsgEntry) error {
  2464. data := api.SHostReportDmesgInput{
  2465. Entries: entries,
  2466. }
  2467. _, err := modules.Hosts.PerformAction(h.GetSession(), h.HostId, "report-dmesg", jsonutils.Marshal(data))
  2468. return err
  2469. }
  2470. func (h *SHostInfo) getHostname() string {
  2471. if h.FullName == "" {
  2472. h.FullName = h.fetchHostname()
  2473. }
  2474. return h.FullName
  2475. }
  2476. func (h *SHostInfo) GetCpuArchitecture() string {
  2477. return h.Cpu.CpuArchitecture
  2478. }
  2479. func (h *SHostInfo) GetKernelVersion() string {
  2480. return h.sysinfo.KernelVersion
  2481. }
  2482. func (h *SHostInfo) IsAarch64() bool {
  2483. return h.GetCpuArchitecture() == apis.OS_ARCH_AARCH64
  2484. }
  2485. func (h *SHostInfo) IsRiscv64() bool {
  2486. return h.GetCpuArchitecture() == apis.OS_ARCH_RISCV64
  2487. }
  2488. func (h *SHostInfo) IsX8664() bool {
  2489. return h.GetCpuArchitecture() == apis.OS_ARCH_X86_64
  2490. }
  2491. func (h *SHostInfo) GetKubeletConfig() kubelet.KubeletConfig {
  2492. return h.kubeletConfig
  2493. }
  2494. func (h *SHostInfo) GetHostTopology() *hostapi.HostTopology {
  2495. return h.sysinfo.Topology
  2496. }
  2497. func (h *SHostInfo) GetReservedCpusInfo() (*cpuset.CPUSet, *cpuset.CPUSet) {
  2498. if h.reservedCpusInfo == nil {
  2499. return nil, nil
  2500. }
  2501. cpus, _ := cpuset.Parse(h.reservedCpusInfo.Cpus)
  2502. var guestPinnedCpus *cpuset.CPUSet
  2503. if len(h.guestPinnedCpus) > 0 {
  2504. guestPinnedCpuSet := cpuset.NewCPUSet(h.guestPinnedCpus...)
  2505. guestPinnedCpus = &guestPinnedCpuSet
  2506. }
  2507. return &cpus, guestPinnedCpus
  2508. }
  2509. func (h *SHostInfo) IsSchedulerNumaAllocateEnabled() bool {
  2510. return h.enableNumaAllocate
  2511. }
  2512. func (h *SHostInfo) IsContainerdRuning() bool {
  2513. return false
  2514. }
  2515. func (h *SHostInfo) IsContainerHost() bool {
  2516. //return options.HostOptions.EnableContainerRuntime || options.HostOptions.HostType == api.HOST_TYPE_CONTAINER
  2517. return options.HostOptions.HostType == api.HOST_TYPE_CONTAINER
  2518. }
  2519. func (h *SHostInfo) GetContainerRuntimeEndpoint() string {
  2520. return options.HostOptions.ContainerRuntimeEndpoint
  2521. }
  2522. func (h *SHostInfo) CpuCmtBound() float32 {
  2523. return h.cpuCmtBound
  2524. }
  2525. func (h *SHostInfo) MemCmtBound() float32 {
  2526. return h.memCmtBound
  2527. }
  2528. func (h *SHostInfo) getProcessesPids(processesPrefix []string) (map[string]string, error) {
  2529. files, err := os.ReadDir("/proc")
  2530. if err != nil {
  2531. return nil, err
  2532. }
  2533. res := map[string]string{}
  2534. re := regexp.MustCompile(`^\d+$`)
  2535. for _, f := range files {
  2536. if re.MatchString(f.Name()) {
  2537. cmdline, err := fileutils2.FileGetContents(path.Join("/proc", f.Name(), "cmdline"))
  2538. if err != nil {
  2539. log.Errorf("failed read proc %s cmdline: %s", f.Name(), err)
  2540. continue
  2541. }
  2542. segs := strings.Split(cmdline, "\x00")
  2543. if utils.IsInStringArray(segs[0], processesPrefix) {
  2544. res[segs[0]] = f.Name()
  2545. // log.Debugf("getProcessesPids append %s %s", segs[0], f.Name())
  2546. }
  2547. }
  2548. }
  2549. return res, nil
  2550. }
  2551. func (h *SHostInfo) startBindReservedCpus(processesPrefix []string) {
  2552. for {
  2553. processPids, err := h.getProcessesPids(processesPrefix)
  2554. if err != nil {
  2555. log.Errorf("getProcessesPids %s", err)
  2556. } else {
  2557. for process, pid := range processPids {
  2558. cgroupName := path.Join(hostconsts.HOST_RESERVED_CPUSET, strings.ReplaceAll(process, "/", "_"))
  2559. task := cgrouputils.NewCGroupCPUSetTask(pid, cgroupName, "", "")
  2560. if !task.Configure() {
  2561. log.Errorf("process failed init reserved cpuset %s %s", process, pid)
  2562. continue
  2563. }
  2564. if !task.CustomConfig(cgrouputils.CPUSET_CLONE_CHILDREN, "1") {
  2565. log.Errorf("process failed set host reserved cpuset clone children %s %s", process, pid)
  2566. continue
  2567. }
  2568. if !task.SetTask() {
  2569. log.Errorf("process %s %s failed set cgroup cpuset", process, pid)
  2570. continue
  2571. }
  2572. }
  2573. }
  2574. time.Sleep(time.Second * 100)
  2575. }
  2576. }
  2577. func (h *SHostInfo) OnGuestLoadingComplete() {
  2578. for _, nic := range h.Nics {
  2579. if nic.dhcpServer6 != nil {
  2580. nic.dhcpServer6.InitRAQueue()
  2581. }
  2582. }
  2583. }
  2584. func NewHostInfo() (*SHostInfo, error) {
  2585. var res = new(SHostInfo)
  2586. res.sysinfo = &SSysInfo{}
  2587. cpu, err := DetectCpuInfo()
  2588. if err != nil {
  2589. return nil, err
  2590. } else {
  2591. res.Cpu = cpu
  2592. }
  2593. if res.IsAarch64() {
  2594. qemutils.UseAarch64()
  2595. } else if res.IsRiscv64() {
  2596. qemutils.UseRiscv64()
  2597. } else if !res.IsX8664() {
  2598. return nil, fmt.Errorf("unsupport cpu architecture %s", cpu.CpuArchitecture)
  2599. }
  2600. log.Infof("CPU Model %s Microcode %s", cpu.cpuInfoProc.Model, cpu.cpuInfoProc.Microcode)
  2601. mem, err := DetectMemoryInfo()
  2602. if err != nil {
  2603. return nil, err
  2604. } else {
  2605. res.Mem = mem
  2606. }
  2607. res.Nics = make([]*SNIC, 0)
  2608. // res.IsRegistered = make(chan struct{})
  2609. res.SysError = map[string][]api.HostError{}
  2610. if !options.HostOptions.DisableProbeKubelet {
  2611. kubeletDir := options.HostOptions.KubeletRunDirectory
  2612. kubeletConfig, err := kubelet.NewKubeletConfigByDirectory(kubeletDir)
  2613. if err != nil {
  2614. return nil, errors.Wrapf(err, "New kubelet config by dir: %s", kubeletDir)
  2615. }
  2616. res.kubeletConfig = kubeletConfig
  2617. log.Infof("Get kubelet container image Fs: %s, eviction config: %s", res.kubeletConfig.GetImageFs(), res.kubeletConfig.GetEvictionConfig())
  2618. }
  2619. return res, nil
  2620. }
  2621. var hostInfo *SHostInfo
  2622. func Instance() *SHostInfo {
  2623. if hostInfo == nil {
  2624. var err error
  2625. hostInfo, err = NewHostInfo()
  2626. if err != nil {
  2627. log.Fatalf("NewHostInfo: %s", err)
  2628. }
  2629. }
  2630. return hostInfo
  2631. }
  2632. func Stop() {
  2633. hostInfo.stop()
  2634. }