api.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package lbagent
  15. import (
  16. "context"
  17. "fmt"
  18. "strings"
  19. "sync"
  20. "time"
  21. "yunion.io/x/jsonutils"
  22. "yunion.io/x/log"
  23. "yunion.io/x/pkg/errors"
  24. "yunion.io/x/pkg/util/version"
  25. "yunion.io/x/onecloud/pkg/apihelper"
  26. api "yunion.io/x/onecloud/pkg/apis/compute"
  27. computemodels "yunion.io/x/onecloud/pkg/compute/models"
  28. "yunion.io/x/onecloud/pkg/hostman/guestfs/fsdriver"
  29. agentmodels "yunion.io/x/onecloud/pkg/lbagent/models"
  30. agentutils "yunion.io/x/onecloud/pkg/lbagent/utils"
  31. "yunion.io/x/onecloud/pkg/mcclient"
  32. "yunion.io/x/onecloud/pkg/mcclient/auth"
  33. modules "yunion.io/x/onecloud/pkg/mcclient/modules/compute"
  34. options "yunion.io/x/onecloud/pkg/mcclient/options/compute"
  35. "yunion.io/x/onecloud/pkg/util/netutils2"
  36. )
  37. type ApiHelper struct {
  38. opts *Options
  39. lbagentId string
  40. dataDirMan *agentutils.ConfigDirManager
  41. apih *apihelper.APIHelper
  42. corpus *agentmodels.LoadbalancerCorpus
  43. agentParams *agentmodels.AgentParams
  44. haState string
  45. haStateProvider HaStateProvider
  46. mcclientSession *mcclient.ClientSession
  47. ovn *OvnWorker
  48. }
  49. func NewApiHelper(opts *Options, lbagentId string) (*ApiHelper, error) {
  50. corpus := agentmodels.NewEmptyLoadbalancerCorpus()
  51. apiOpts := &apihelper.Options{
  52. CommonOptions: opts.CommonOptions,
  53. SyncIntervalSeconds: opts.ApiSyncIntervalSeconds,
  54. RunDelayMilliseconds: opts.ApiRunDelayMilliseconds,
  55. ListBatchSize: opts.ApiListBatchSize,
  56. }
  57. apih, err := apihelper.NewAPIHelper(apiOpts, corpus.ModelSets)
  58. if err != nil {
  59. return nil, errors.Wrap(err, "new apihelper")
  60. }
  61. helper := &ApiHelper{
  62. opts: opts,
  63. lbagentId: lbagentId,
  64. dataDirMan: agentutils.NewConfigDirManager(opts.apiDataStoreDir),
  65. apih: apih,
  66. corpus: corpus,
  67. haState: api.LB_HA_STATE_UNKNOWN,
  68. }
  69. return helper, nil
  70. }
  71. func (h *ApiHelper) deployAdminAuthorizedKeys(ctx context.Context) {
  72. err := fsdriver.DeployAdminAuthorizedKeys(h.adminClientSession(ctx))
  73. if err != nil {
  74. log.Errorf("DeployAdminAuthorizedKeys %s", err)
  75. }
  76. }
  77. func (h *ApiHelper) Run(ctx context.Context) {
  78. // deploy host admin key
  79. h.deployAdminAuthorizedKeys(ctx)
  80. wg := ctx.Value("wg").(*sync.WaitGroup)
  81. defer func() {
  82. wg.Done()
  83. log.Infof("api helper bye")
  84. }()
  85. h.haState = <-h.haStateProvider.StateChannel()
  86. log.Infof("initial haState: %s", h.haState)
  87. switch h.haState {
  88. case api.LB_HA_STATE_BACKUP:
  89. default:
  90. h.startOvnWorker(ctx)
  91. }
  92. wg.Add(1)
  93. go h.apih.Start(ctx, nil, "")
  94. hbTicker := time.NewTicker(time.Duration(h.opts.ApiLbagentHbInterval) * time.Second)
  95. agentParamsSyncTicker := time.NewTicker(time.Duration(h.opts.ApiSyncIntervalSeconds) * time.Second)
  96. defer hbTicker.Stop()
  97. defer agentParamsSyncTicker.Stop()
  98. for {
  99. select {
  100. case <-hbTicker.C:
  101. _, err := h.doHb(ctx)
  102. if err != nil {
  103. log.Errorf("heartbeat: %s", err)
  104. }
  105. case imss := <-h.apih.ModelSets():
  106. log.Infof("got new data from api helper")
  107. mss := imss.(*agentmodels.ModelSets)
  108. h.corpus.ModelSets = mss
  109. h.doUseCorpus(ctx)
  110. h.agentUpdateSeen(ctx)
  111. err := h.saveCorpus(ctx)
  112. if err != nil {
  113. log.Errorf("save corpus failed: %s", err)
  114. } else {
  115. if err := h.dataDirMan.Prune(h.opts.DataPreserveN); err != nil {
  116. log.Errorf("prune corpus data dir failed: %s", err)
  117. }
  118. }
  119. case <-agentParamsSyncTicker.C:
  120. changed := h.doSyncAgentParams(ctx)
  121. if changed {
  122. log.Infof("agent params changed")
  123. h.doUseCorpus(ctx)
  124. }
  125. case state := <-h.haStateProvider.StateChannel():
  126. log.Infof("current state: %s ha_state: %s", h.haState, state)
  127. switch state {
  128. case api.LB_HA_STATE_BACKUP:
  129. h.stopOvnWorker()
  130. h.doStopDaemons(ctx)
  131. default:
  132. if state != h.haState {
  133. // try your best to make things up
  134. h.startOvnWorker(ctx)
  135. h.doUseCorpus(ctx)
  136. }
  137. }
  138. h.haState = state
  139. case <-ctx.Done():
  140. return
  141. }
  142. }
  143. }
  144. func (h *ApiHelper) SetHaStateProvider(hsp HaStateProvider) {
  145. h.haStateProvider = hsp
  146. }
  147. func (h *ApiHelper) startOvnWorker(ctx context.Context) {
  148. if h.ovn == nil && !h.opts.DisableLocalVpc {
  149. h.ovn = NewOvnWorker(h.opts)
  150. go h.ovn.Start(ctx)
  151. }
  152. }
  153. func (h *ApiHelper) stopOvnWorker() {
  154. if h.ovn != nil {
  155. h.ovn.Stop()
  156. h.ovn = nil
  157. }
  158. }
  159. func (h *ApiHelper) adminClientSession(ctx context.Context) *mcclient.ClientSession {
  160. s := h.mcclientSession
  161. if s != nil {
  162. token := s.GetToken()
  163. expires := token.GetExpires()
  164. if time.Now().Add(time.Hour).After(expires) {
  165. return s
  166. }
  167. }
  168. region := h.opts.CommonOptions.Region
  169. h.mcclientSession = auth.GetAdminSession(ctx, region)
  170. return h.mcclientSession
  171. }
  172. func (h *ApiHelper) agentPeekOnce(ctx context.Context) (*computemodels.SLoadbalancerAgent, error) {
  173. s := h.adminClientSession(ctx)
  174. params := jsonutils.NewDict()
  175. params.Set(api.LBAGENT_QUERY_ORIG_KEY, jsonutils.NewString(api.LBAGENT_QUERY_ORIG_VAL))
  176. data, err := modules.LoadbalancerAgents.Get(s, h.lbagentId, params)
  177. if err != nil {
  178. err := fmt.Errorf("agent get error: %s", err)
  179. return nil, err
  180. }
  181. agent := &computemodels.SLoadbalancerAgent{}
  182. err = data.Unmarshal(agent)
  183. if err != nil {
  184. err := fmt.Errorf("agent data unmarshal error: %s", err)
  185. return nil, err
  186. }
  187. return agent, nil
  188. }
  189. func (h *ApiHelper) agentPeekPeers(ctx context.Context, agent *computemodels.SLoadbalancerAgent) ([]*computemodels.SLoadbalancerAgent, error) {
  190. vri := agent.Params.Vrrp.VirtualRouterId
  191. clusterId := agent.ClusterId
  192. s := h.adminClientSession(ctx)
  193. params := jsonutils.NewDict()
  194. params.Set(api.LBAGENT_QUERY_ORIG_KEY, jsonutils.NewString(api.LBAGENT_QUERY_ORIG_VAL))
  195. params.Set("cluster_id", jsonutils.NewString(clusterId))
  196. listResult, err := modules.LoadbalancerAgents.List(s, params)
  197. if err != nil {
  198. err := fmt.Errorf("agent listing error: %s", err)
  199. return nil, err
  200. }
  201. peers := []*computemodels.SLoadbalancerAgent{}
  202. for _, data := range listResult.Data {
  203. peerAgent := &computemodels.SLoadbalancerAgent{}
  204. err := data.Unmarshal(peerAgent)
  205. if err != nil {
  206. err := fmt.Errorf("agent data unmarshal error: %s", err)
  207. return nil, err
  208. }
  209. // just in case
  210. if peerAgent.ClusterId != clusterId {
  211. continue
  212. }
  213. if peerAgent.Params.Vrrp.VirtualRouterId != vri {
  214. continue
  215. }
  216. peers = append(peers, peerAgent)
  217. }
  218. return peers, nil
  219. }
  220. type agentPeekResult computemodels.SLoadbalancerAgent
  221. func (r *agentPeekResult) staleInFuture(s int) bool {
  222. if r.HbLastSeen.IsZero() {
  223. return true
  224. }
  225. duration := time.Since(r.HbLastSeen).Seconds()
  226. if int(duration) < s {
  227. return true
  228. }
  229. return false
  230. }
  231. /*
  232. func (h *ApiHelper) agentPeek(ctx context.Context) *agentPeekResult {
  233. doPeekWithLog := func() *computemodels.SLoadbalancerAgent {
  234. agent, err := h.agentPeekOnce(ctx)
  235. if err != nil {
  236. log.Errorf("agent peek failed: %s", err)
  237. }
  238. return agent
  239. }
  240. agent := doPeekWithLog()
  241. if agent == nil {
  242. initHbTicker := time.NewTicker(time.Duration(3) * time.Second)
  243. defer initHbTicker.Stop()
  244. initHbDone:
  245. for {
  246. select {
  247. case <-initHbTicker.C:
  248. agent = doPeekWithLog()
  249. if agent != nil {
  250. break initHbDone
  251. }
  252. case <-ctx.Done():
  253. return nil
  254. }
  255. }
  256. }
  257. return (*agentPeekResult)(agent)
  258. }
  259. */
  260. func (h *ApiHelper) agentUpdateSeen(ctx context.Context) *computemodels.SLoadbalancerAgent {
  261. s := h.adminClientSession(ctx)
  262. params := h.corpus.MaxSeenUpdatedAtParams()
  263. data, err := modules.LoadbalancerAgents.Update(s, h.lbagentId, params)
  264. if err != nil {
  265. log.Errorf("agent get error: %s", err)
  266. return nil
  267. }
  268. agent := &computemodels.SLoadbalancerAgent{}
  269. err = data.Unmarshal(agent)
  270. if err != nil {
  271. log.Errorf("agent data unmarshal error: %s", err)
  272. return nil
  273. }
  274. return agent
  275. }
  276. func (h *ApiHelper) newAgentHbParams(ctx context.Context) (*jsonutils.JSONDict, error) {
  277. ip, err := netutils2.MyIPSmart()
  278. if err != nil {
  279. return nil, err
  280. }
  281. state := h.haState
  282. version := version.Get().GitVersion
  283. opts := &options.LoadbalancerAgentActionHbOptions{
  284. IP: ip,
  285. HaState: state,
  286. Version: version,
  287. }
  288. params, err := opts.Params()
  289. if err != nil {
  290. return nil, err
  291. }
  292. return params, nil
  293. }
  294. func (h *ApiHelper) doHb(ctx context.Context) (*computemodels.SLoadbalancerAgent, error) {
  295. // TODO check if things changed recently
  296. s := h.adminClientSession(ctx)
  297. params, err := h.newAgentHbParams(ctx)
  298. if err != nil {
  299. return nil, fmt.Errorf("heartbeat: making params: %s", err)
  300. }
  301. data, err := modules.LoadbalancerAgents.PerformAction(s, h.lbagentId, "hb", params)
  302. if err != nil {
  303. err := fmt.Errorf("heartbeat api error: %s", err)
  304. return nil, err
  305. }
  306. agent := &computemodels.SLoadbalancerAgent{}
  307. err = data.Unmarshal(agent)
  308. if err != nil {
  309. err := fmt.Errorf("heartbeat data unmarshal error: %s", err)
  310. return nil, err
  311. }
  312. return agent, nil
  313. }
  314. func (h *ApiHelper) saveCorpus(ctx context.Context) error {
  315. _, err := h.dataDirMan.NewDir(func(dir string) error {
  316. err := h.corpus.SaveDir(dir)
  317. if err != nil {
  318. return fmt.Errorf("save to dir %s: %s", dir, err)
  319. }
  320. return nil
  321. })
  322. return err
  323. }
  324. func (h *ApiHelper) doSyncAgentParams(ctx context.Context) bool {
  325. agent, err := h.agentPeekOnce(ctx)
  326. if err != nil {
  327. log.Errorf("agent params get failure: %s", err)
  328. return false
  329. }
  330. peers, err := h.agentPeekPeers(ctx, agent)
  331. if err != nil {
  332. log.Errorf("agent get peers failure: %s", err)
  333. return false
  334. }
  335. unicastPeer := []string{}
  336. for _, peer := range peers {
  337. if peer.Id == agent.Id {
  338. continue
  339. }
  340. if peer.IP == "" {
  341. log.Warningf("agent %s(%s) has no ip, use multicast vrrp", peer.Name, peer.Id)
  342. break
  343. }
  344. unicastPeer = append(unicastPeer, peer.IP)
  345. }
  346. useUnicast := len(unicastPeer) == len(peers)-1
  347. agentParams, err := agentmodels.NewAgentParams(agent)
  348. if err != nil {
  349. log.Errorf("agent params prepare failure: %s", err)
  350. return false
  351. }
  352. agentParams.SetVrrpParams("notify_script", h.haStateProvider.StateScript())
  353. if useUnicast {
  354. agentParams.SetVrrpParams("unicast_peer", unicastPeer)
  355. }
  356. if !agentParams.Equals(h.agentParams) {
  357. if useUnicast {
  358. log.Infof("use unicast vrrp from %s to %s", agent.IP, strings.Join(unicastPeer, ","))
  359. }
  360. h.agentParams = agentParams
  361. return true
  362. }
  363. return false
  364. }
  365. func (h *ApiHelper) doUseCorpus(ctx context.Context) {
  366. if h.corpus == nil || h.corpus.ModelSets == nil {
  367. log.Warningf("agent corpus nil")
  368. return
  369. }
  370. if h.agentParams == nil {
  371. log.Warningf("agent params nil")
  372. return
  373. }
  374. if h.ovn != nil {
  375. if err := h.ovn.Refresh(ctx, h.corpus.ModelSets.Loadbalancers); err != nil {
  376. log.Errorf("ovn refresh: %v", err)
  377. }
  378. }
  379. log.Infof("make effect new corpus and params")
  380. cmdData := &LbagentCmdUseCorpusData{
  381. Corpus: h.corpus,
  382. AgentParams: h.agentParams,
  383. }
  384. cmdData.Wg.Add(1)
  385. cmd := &LbagentCmd{
  386. Type: LbagentCmdUseCorpus,
  387. Data: cmdData,
  388. }
  389. cmdChan := ctx.Value("cmdChan").(chan *LbagentCmd)
  390. select {
  391. case cmdChan <- cmd:
  392. cmdData.Wg.Wait()
  393. case <-ctx.Done():
  394. return
  395. }
  396. }
  397. func (h *ApiHelper) doStopDaemons(ctx context.Context) {
  398. cmd := &LbagentCmd{
  399. Type: LbagentCmdStopDaemons,
  400. }
  401. cmdChan := ctx.Value("cmdChan").(chan *LbagentCmd)
  402. select {
  403. case cmdChan <- cmd:
  404. case <-ctx.Done():
  405. }
  406. }