vllm.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. package llm_container
  2. import (
  3. "context"
  4. "encoding/json"
  5. "fmt"
  6. "net/http"
  7. "net/url"
  8. "os"
  9. "path"
  10. "path/filepath"
  11. "strconv"
  12. "strings"
  13. "time"
  14. "unicode"
  15. "yunion.io/x/log"
  16. "yunion.io/x/pkg/errors"
  17. commonapi "yunion.io/x/onecloud/pkg/apis"
  18. computeapi "yunion.io/x/onecloud/pkg/apis/compute"
  19. api "yunion.io/x/onecloud/pkg/apis/llm"
  20. "yunion.io/x/onecloud/pkg/llm/models"
  21. "yunion.io/x/onecloud/pkg/mcclient"
  22. )
  23. func init() {
  24. models.RegisterLLMContainerDriver(newVLLM())
  25. }
  26. type vllm struct {
  27. baseDriver
  28. }
  29. func newVLLM() models.ILLMContainerDriver {
  30. return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)}
  31. }
  32. // escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”).
  33. func escapeShellSingleQuoted(s string) string {
  34. return strings.ReplaceAll(s, "'", "'\\''")
  35. }
  36. func shellQuoteSingle(s string) string {
  37. return "'" + escapeShellSingleQuoted(s) + "'"
  38. }
  39. var protectedVLLMArgKeys = map[string]struct{}{
  40. "model": {},
  41. "served-model-name": {},
  42. "port": {},
  43. "tensor-parallel-size": {},
  44. }
  45. func validateVLLMArgKey(key string) error {
  46. if key == "" {
  47. return errors.Error("vllm arg key is empty")
  48. }
  49. if strings.HasPrefix(key, "--") {
  50. return errors.Errorf("invalid vllm arg key %q: do not include leading --", key)
  51. }
  52. for _, r := range key {
  53. if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '_' {
  54. continue
  55. }
  56. return errors.Errorf("invalid vllm arg key %q", key)
  57. }
  58. if _, ok := protectedVLLMArgKeys[key]; ok {
  59. return errors.Errorf("vllm arg key %q is protected", key)
  60. }
  61. return nil
  62. }
  63. func normalizeVLLMCustomizedArgs(args []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
  64. if len(args) == 0 {
  65. return nil, nil
  66. }
  67. out := make([]*api.VllmCustomizedArg, 0, len(args))
  68. indexByKey := make(map[string]int, len(args))
  69. for _, arg := range args {
  70. if arg == nil {
  71. continue
  72. }
  73. key := strings.TrimSpace(arg.Key)
  74. if err := validateVLLMArgKey(key); err != nil {
  75. return nil, err
  76. }
  77. next := &api.VllmCustomizedArg{
  78. Key: key,
  79. Value: arg.Value,
  80. }
  81. if idx, ok := indexByKey[key]; ok {
  82. out[idx] = next
  83. continue
  84. }
  85. indexByKey[key] = len(out)
  86. out = append(out, next)
  87. }
  88. if len(out) == 0 {
  89. return nil, nil
  90. }
  91. return out, nil
  92. }
  93. func mergeVLLMCustomizedArgs(base, overrides []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
  94. out := make([]*api.VllmCustomizedArg, 0, len(base)+len(overrides))
  95. indexByKey := make(map[string]int, len(base)+len(overrides))
  96. appendNormalized := func(items []*api.VllmCustomizedArg) error {
  97. normalized, err := normalizeVLLMCustomizedArgs(items)
  98. if err != nil {
  99. return err
  100. }
  101. for _, arg := range normalized {
  102. if idx, ok := indexByKey[arg.Key]; ok {
  103. out[idx] = arg
  104. continue
  105. }
  106. indexByKey[arg.Key] = len(out)
  107. out = append(out, arg)
  108. }
  109. return nil
  110. }
  111. if err := appendNormalized(base); err != nil {
  112. return nil, err
  113. }
  114. if err := appendNormalized(overrides); err != nil {
  115. return nil, err
  116. }
  117. if len(out) == 0 {
  118. return nil, nil
  119. }
  120. return out, nil
  121. }
  122. func buildVLLMServeFlags(modelPath string, tensorParallelSize, defaultSwapSpaceGiB int, effSpec *api.LLMSpecVllm) []string {
  123. modelQuoted := shellQuoteSingle(modelPath)
  124. flags := []string{
  125. fmt.Sprintf("--model %s", modelQuoted),
  126. fmt.Sprintf(`--served-model-name "$(basename %s)"`, modelQuoted),
  127. fmt.Sprintf("--port %d", api.LLM_VLLM_DEFAULT_PORT),
  128. fmt.Sprintf("--tensor-parallel-size %d", tensorParallelSize),
  129. fmt.Sprintf("--swap-space %d", defaultSwapSpaceGiB),
  130. }
  131. if effSpec == nil || len(effSpec.CustomizedArgs) == 0 {
  132. return flags
  133. }
  134. normalizedArgs, err := normalizeVLLMCustomizedArgs(effSpec.CustomizedArgs)
  135. if err != nil {
  136. log.Errorf("normalize vllm customized args: %v", err)
  137. return flags
  138. }
  139. for _, arg := range normalizedArgs {
  140. flagName := "--" + arg.Key
  141. if arg.Key == "swap-space" {
  142. if arg.Value == "" {
  143. flags[4] = flagName
  144. } else {
  145. flags[4] = fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value))
  146. }
  147. continue
  148. }
  149. if arg.Value == "" {
  150. flags = append(flags, flagName)
  151. continue
  152. }
  153. flags = append(flags, fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value)))
  154. }
  155. return flags
  156. }
  157. func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} {
  158. if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil {
  159. return nil
  160. }
  161. return sku.LLMSpec.Vllm
  162. }
  163. func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} {
  164. var skuSpec *api.LLMSpecVllm
  165. if s := v.GetSpec(sku); s != nil {
  166. skuSpec = s.(*api.LLMSpecVllm)
  167. }
  168. var llmSpec *api.LLMSpecVllm
  169. if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
  170. llmSpec = llm.LLMSpec.Vllm
  171. }
  172. if skuSpec == nil && llmSpec == nil {
  173. return nil
  174. }
  175. out := &api.LLMSpecVllm{}
  176. if skuSpec != nil {
  177. out.PreferredModel = skuSpec.PreferredModel
  178. out.CustomizedArgs = skuSpec.CustomizedArgs
  179. }
  180. if llmSpec != nil {
  181. if llmSpec.PreferredModel != "" {
  182. out.PreferredModel = llmSpec.PreferredModel
  183. }
  184. }
  185. mergedArgs, err := mergeVLLMCustomizedArgs(out.CustomizedArgs, nil)
  186. if err != nil {
  187. log.Errorf("normalize sku vllm customized args: %v", err)
  188. out.CustomizedArgs = nil
  189. } else {
  190. out.CustomizedArgs = mergedArgs
  191. }
  192. if llmSpec != nil {
  193. mergedArgs, err = mergeVLLMCustomizedArgs(out.CustomizedArgs, llmSpec.CustomizedArgs)
  194. if err != nil {
  195. log.Errorf("merge vllm customized args: %v", err)
  196. } else {
  197. out.CustomizedArgs = mergedArgs
  198. }
  199. }
  200. return out
  201. }
  202. func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
  203. input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input)
  204. if err != nil {
  205. return nil, err
  206. }
  207. // Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU.
  208. spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
  209. if err != nil {
  210. return nil, err
  211. }
  212. if spec == nil {
  213. spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}}
  214. } else if spec.Vllm == nil {
  215. spec.Vllm = &api.LLMSpecVllm{}
  216. }
  217. input.LLMSpec = spec
  218. return input, nil
  219. }
  220. func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
  221. input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input)
  222. if err != nil {
  223. return nil, err
  224. }
  225. if input.LLMSpec == nil {
  226. return input, nil
  227. }
  228. // Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec".
  229. fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
  230. spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
  231. if err != nil {
  232. return nil, err
  233. }
  234. input.LLMSpec = spec
  235. if input.LLMSpec != nil && input.LLMSpec.Vllm == nil {
  236. input.LLMSpec.Vllm = &api.LLMSpecVllm{}
  237. }
  238. return input, nil
  239. }
  240. // ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty.
  241. func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
  242. if input == nil {
  243. return nil, nil
  244. }
  245. if input.Vllm == nil {
  246. input.Vllm = &api.LLMSpecVllm{}
  247. }
  248. preferred := input.Vllm.PreferredModel
  249. if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
  250. preferred = sku.LLMSpec.Vllm.PreferredModel
  251. }
  252. spec := &api.LLMSpecVllm{}
  253. if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
  254. base := *sku.LLMSpec.Vllm
  255. spec = &base
  256. }
  257. // Apply create overrides
  258. if preferred != "" {
  259. spec.PreferredModel = preferred
  260. }
  261. mergedArgs, err := mergeVLLMCustomizedArgs(spec.CustomizedArgs, input.Vllm.CustomizedArgs)
  262. if err != nil {
  263. return nil, err
  264. }
  265. spec.CustomizedArgs = mergedArgs
  266. return &api.LLMSpec{Vllm: spec}, nil
  267. }
  268. // ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty.
  269. func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
  270. if input == nil || input.Vllm == nil {
  271. return input, nil
  272. }
  273. base := &api.LLMSpecVllm{}
  274. if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
  275. b := *llm.LLMSpec.Vllm
  276. base = &b
  277. }
  278. // preferred_model: only overwrite when non-empty
  279. if input.Vllm.PreferredModel != "" {
  280. base.PreferredModel = input.Vllm.PreferredModel
  281. }
  282. mergedArgs, err := mergeVLLMCustomizedArgs(base.CustomizedArgs, input.Vllm.CustomizedArgs)
  283. if err != nil {
  284. return nil, err
  285. }
  286. base.CustomizedArgs = mergedArgs
  287. return &api.LLMSpec{Vllm: base}, nil
  288. }
  289. func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
  290. // Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec.
  291. startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity`
  292. envs := []*commonapi.ContainerKeyValue{
  293. {
  294. Key: "HUGGING_FACE_HUB_CACHE",
  295. Value: api.LLM_VLLM_CACHE_DIR,
  296. },
  297. {
  298. Key: "HF_ENDPOINT",
  299. Value: api.LLM_VLLM_HF_ENDPOINT,
  300. },
  301. // // Fix Error 803
  302. // {
  303. // Key: "LD_LIBRARY_PATH",
  304. // Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}",
  305. // },
  306. // // Fix Error 803
  307. // {
  308. // Key: "LD_PRELOAD",
  309. // Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so",
  310. // },
  311. }
  312. spec := computeapi.ContainerSpec{
  313. ContainerSpec: commonapi.ContainerSpec{
  314. Image: image.ToContainerImage(),
  315. ImageCredentialId: image.CredentialId,
  316. Command: []string{"/bin/sh", "-c"},
  317. Args: []string{startScript},
  318. EnableLxcfs: true,
  319. AlwaysRestart: true,
  320. Envs: envs,
  321. },
  322. }
  323. // GPU Devices
  324. if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
  325. for i := range *sku.Devices {
  326. index := i
  327. spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
  328. Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
  329. IsolatedDevice: &computeapi.ContainerIsolatedDevice{
  330. Index: &index,
  331. },
  332. })
  333. }
  334. } else if len(devices) > 0 {
  335. for i := range devices {
  336. spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
  337. Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
  338. IsolatedDevice: &computeapi.ContainerIsolatedDevice{
  339. Id: devices[i].Id,
  340. },
  341. })
  342. }
  343. }
  344. // Volume Mounts
  345. diskIndex := 0
  346. postOverlays, err := llm.GetMountedModelsPostOverlay()
  347. if err != nil {
  348. log.Errorf("GetMountedModelsPostOverlay failed %s", err)
  349. }
  350. ctrVols := []*commonapi.ContainerVolumeMount{
  351. {
  352. Disk: &commonapi.ContainerVolumeMountDisk{
  353. SubDirectory: api.LLM_VLLM,
  354. Index: &diskIndex,
  355. PostOverlay: postOverlays,
  356. },
  357. Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
  358. MountPath: api.LLM_VLLM_BASE_PATH,
  359. ReadOnly: false,
  360. Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER,
  361. },
  362. {
  363. // Mount cache dir to save HF cache
  364. Disk: &commonapi.ContainerVolumeMountDisk{
  365. SubDirectory: "cache",
  366. Index: &diskIndex,
  367. },
  368. Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
  369. MountPath: "/root/.cache",
  370. ReadOnly: false,
  371. },
  372. }
  373. spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...)
  374. return &computeapi.PodContainerCreateInput{
  375. ContainerSpec: spec,
  376. }
  377. }
  378. func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput {
  379. return []*computeapi.PodContainerCreateInput{
  380. v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId),
  381. }
  382. }
  383. func (v *vllm) GetLLMAccessUrlInfo(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *models.LLMAccessInfoInput) (*api.LLMAccessUrlInfo, error) {
  384. return models.GetLLMAccessUrlInfo(ctx, userCred, llm, input, "http", api.LLM_VLLM_DEFAULT_PORT)
  385. }
  386. func buildVLLMHealthCheckURL(networkType, llmIP, hostAccessIP string, accessInfo *models.SAccessInfo) (string, error) {
  387. if networkType == string(computeapi.NETWORK_TYPE_GUEST) {
  388. if len(llmIP) == 0 {
  389. return "", errors.Error("LLM IP is empty for guest network")
  390. }
  391. return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
  392. }
  393. if len(llmIP) > 0 {
  394. return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
  395. }
  396. if len(hostAccessIP) == 0 {
  397. return "", errors.Error("host access IP is empty")
  398. }
  399. port := api.LLM_VLLM_DEFAULT_PORT
  400. if accessInfo != nil && accessInfo.AccessPort > 0 {
  401. port = accessInfo.AccessPort
  402. }
  403. return fmt.Sprintf("http://%s:%d/health", hostAccessIP, port), nil
  404. }
  405. // resolveModelPath resolves the model directory inside the container.
  406. // It prefers preferredPath when it exists; otherwise it picks the first directory under models path.
  407. // Returns (empty, nil) when no model is found.
  408. func (v *vllm) resolveModelPath(ctx context.Context, containerId string, preferredPath string) (string, error) {
  409. preferredQuoted := shellQuoteSingle(preferredPath)
  410. cmd := fmt.Sprintf(
  411. `mkdir -p %s;
  412. preferred=%s;
  413. if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi;
  414. if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi;
  415. printf '%%s\n' "$model"`,
  416. api.LLM_VLLM_MODELS_PATH,
  417. preferredQuoted,
  418. api.LLM_VLLM_MODELS_PATH,
  419. )
  420. out, err := exec(ctx, containerId, cmd, 30)
  421. if err != nil {
  422. return "", errors.Wrap(err, "exec resolve model path")
  423. }
  424. out = strings.TrimSpace(out)
  425. if out == "NO_MODEL" || out == "" {
  426. return "", nil
  427. }
  428. return out, nil
  429. }
  430. // StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready.
  431. func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error {
  432. lc, err := llm.GetLLMContainer()
  433. if err != nil {
  434. return errors.Wrap(err, "get llm container")
  435. }
  436. sku, err := llm.GetLLMSku(llm.LLMSkuId)
  437. if err != nil {
  438. return errors.Wrap(err, "get llm sku")
  439. }
  440. tensorParallelSize := 1
  441. if sku.Devices != nil && len(*sku.Devices) > 0 {
  442. tensorParallelSize = len(*sku.Devices)
  443. }
  444. swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
  445. if swapSpaceGiB < 1 {
  446. swapSpaceGiB = 1
  447. }
  448. effSpec := (*api.LLMSpecVllm)(nil)
  449. preferredPath := ""
  450. if eff := v.GetEffectiveSpec(llm, sku); eff != nil {
  451. effSpec = eff.(*api.LLMSpecVllm)
  452. if preferred := effSpec.PreferredModel; preferred != "" {
  453. preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred)
  454. }
  455. }
  456. modelPath, err := v.resolveModelPath(ctx, lc.CmpId, preferredPath)
  457. if err != nil {
  458. return err
  459. }
  460. if modelPath == "" {
  461. return nil // no model
  462. }
  463. startCmd := fmt.Sprintf(
  464. "nohup %s %s > /tmp/vllm.log 2>&1 &",
  465. api.LLM_VLLM_EXEC_PATH,
  466. strings.Join(buildVLLMServeFlags(modelPath, tensorParallelSize, swapSpaceGiB, effSpec), " "),
  467. )
  468. _, err = exec(ctx, lc.CmpId, startCmd, 30)
  469. if err != nil {
  470. log.Errorf("vLLM start failed, exec command: %s", startCmd)
  471. return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd)
  472. }
  473. cmd := startCmd
  474. // Wait for health endpoint
  475. input, err := llm.GetLLMAccessInfoInput(ctx, userCred)
  476. if err != nil {
  477. return errors.Wrap(err, "get llm url for health check")
  478. }
  479. var accessInfo *models.SAccessInfo
  480. for i := range input.AccessInfos {
  481. if input.AccessInfos[i].ListenPort == api.LLM_VLLM_DEFAULT_PORT {
  482. accessInfo = &input.AccessInfos[i]
  483. break
  484. }
  485. }
  486. if accessInfo == nil && len(input.AccessInfos) > 0 {
  487. accessInfo = &input.AccessInfos[0]
  488. }
  489. healthURL, err := buildVLLMHealthCheckURL(llm.NetworkType, llm.LLMIp, input.HostInternalIp, accessInfo)
  490. if err != nil {
  491. return errors.Wrap(err, "build health check url")
  492. }
  493. deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT)
  494. for time.Now().Before(deadline) {
  495. req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil)
  496. if err != nil {
  497. return errors.Wrap(err, "new health check request")
  498. }
  499. resp, err := http.DefaultClient.Do(req)
  500. if err == nil {
  501. resp.Body.Close()
  502. if resp.StatusCode == http.StatusOK {
  503. return nil
  504. }
  505. }
  506. select {
  507. case <-ctx.Done():
  508. return errors.Wrap(ctx.Err(), "context done while waiting for vLLM")
  509. case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL):
  510. // continue
  511. }
  512. }
  513. // Optionally read last lines of /tmp/vllm.log for better error message
  514. logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5)
  515. if logTail != "" {
  516. return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail))
  517. }
  518. return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd)
  519. }
  520. // ILLMContainerInstantApp implementation
  521. func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) {
  522. lc, err := llm.GetLLMContainer()
  523. if err != nil {
  524. return nil, errors.Wrap(err, "get llm container")
  525. }
  526. // List directories in models path
  527. cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH)
  528. output, err := exec(ctx, lc.CmpId, cmd, 10)
  529. if err != nil {
  530. // If ls fails, maybe no directory yet, return empty
  531. return make(map[string]api.LLMInternalInstantMdlInfo), nil
  532. }
  533. modelsMap := make(map[string]api.LLMInternalInstantMdlInfo)
  534. lines := strings.Split(strings.TrimSpace(output), "\n")
  535. for _, line := range lines {
  536. fields := strings.Fields(line)
  537. if len(fields) < 2 {
  538. continue
  539. }
  540. // Size is in KB
  541. sizeKB, _ := strconv.ParseInt(fields[0], 10, 64)
  542. fullPath := fields[1]
  543. name := path.Base(fullPath)
  544. if name == "" {
  545. continue
  546. }
  547. // We treat the directory name as the model name
  548. // For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs
  549. modelsMap[name] = api.LLMInternalInstantMdlInfo{
  550. Name: name,
  551. ModelId: name,
  552. Tag: "latest",
  553. Size: sizeKB * 1024,
  554. }
  555. }
  556. return modelsMap, nil
  557. }
  558. func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) {
  559. lc, err := llm.GetLLMContainer()
  560. if err != nil {
  561. return nil, errors.Wrap(err, "get llm container")
  562. }
  563. modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name)
  564. checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath)
  565. output, err := exec(ctx, lc.CmpId, checkCmd, 10)
  566. if err != nil {
  567. return nil, errors.Wrap(err, "failed to check file existence")
  568. }
  569. if !strings.Contains(output, "EXIST") {
  570. return nil, errors.Errorf("model directory %s missing", modelPath)
  571. }
  572. return []string{modelPath}, nil
  573. }
  574. func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string {
  575. // Map host paths to container paths
  576. // For vLLM simple volume mount, this might be 1:1 or based on the base path
  577. res := make(map[string]string)
  578. for _, mount := range sApp.Mounts {
  579. relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
  580. res[relPath] = path.Join(api.LLM_VLLM, relPath)
  581. }
  582. return res
  583. }
  584. func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) {
  585. var filteredMounts []string
  586. for _, mount := range sApp.Mounts {
  587. if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) {
  588. relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
  589. filteredMounts = append(filteredMounts, relPath)
  590. }
  591. }
  592. return "", filteredMounts, nil
  593. }
  594. func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) {
  595. return mounts, nil
  596. }
  597. func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string {
  598. return "Duplicate mounts detected"
  599. }
  600. func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string {
  601. return ""
  602. }
  603. func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay {
  604. uid := int64(1000)
  605. gid := int64(1000)
  606. ov := dir.ToOverlay()
  607. ov.FsUser = &uid
  608. ov.FsGroup = &gid
  609. return &ov
  610. }
  611. func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
  612. lc, err := llm.GetLLMContainer()
  613. if err != nil {
  614. return errors.Wrap(err, "get llm container")
  615. }
  616. // Create base directory
  617. cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH)
  618. _, err = exec(ctx, lc.CmpId, cmd, 10)
  619. return err
  620. }
  621. func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error {
  622. return nil
  623. }
  624. func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
  625. // Optionally remove the model directory
  626. // For safety, we might just log or leave it
  627. return nil
  628. }
  629. func resolveHfdRevision(modelTag string) string {
  630. if strings.TrimSpace(modelTag) == "" {
  631. return "main"
  632. }
  633. return strings.TrimSpace(modelTag)
  634. }
  635. type hfModelAPIResponse struct {
  636. Siblings []struct {
  637. RFilename string `json:"rfilename"`
  638. } `json:"siblings"`
  639. }
  640. func escapeURLPathPreserveSlash(p string) string {
  641. if p == "" {
  642. return ""
  643. }
  644. parts := strings.Split(p, "/")
  645. for i := range parts {
  646. parts[i] = url.PathEscape(parts[i])
  647. }
  648. return strings.Join(parts, "/")
  649. }
  650. func isNonEmptyFile(p string) bool {
  651. st, err := os.Stat(p)
  652. if err != nil {
  653. return false
  654. }
  655. return !st.IsDir() && st.Size() > 0
  656. }
  657. func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
  658. // Download HF model on host into tmpDir for instant-model import.
  659. // We place files under tmpDir/huggingface/<repo> so that the archive contains relative paths.
  660. if strings.TrimSpace(tmpDir) == "" {
  661. return "", nil, errors.Error("tmpDir is empty")
  662. }
  663. if strings.TrimSpace(modelName) == "" {
  664. return "", nil, errors.Error("modelName is empty")
  665. }
  666. modelBase := filepath.Base(modelName)
  667. localDir := filepath.Join(tmpDir, "huggingface", modelBase)
  668. if err := os.MkdirAll(localDir, 0755); err != nil {
  669. return "", nil, errors.Wrap(err, "mkdir local model dir")
  670. }
  671. // If already downloaded, short-circuit (directory exists and non-empty).
  672. if entries, err := os.ReadDir(localDir); err == nil && len(entries) > 0 {
  673. targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
  674. log.Infof("Model %s already exists in import dir %s", modelName, localDir)
  675. return modelName, []string{targetDir}, nil
  676. }
  677. rev := resolveHfdRevision(modelTag)
  678. apiURL := fmt.Sprintf("%s/api/models/%s?revision=%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.QueryEscape(rev))
  679. log.Infof("Downloading HF model via HF Mirror API: %s", func() string {
  680. b, _ := json.Marshal(map[string]string{
  681. "model": modelName,
  682. "revision": rev,
  683. "dir": localDir,
  684. "endpoint": api.LLM_VLLM_HF_ENDPOINT,
  685. "api": apiURL,
  686. })
  687. return string(b)
  688. }())
  689. metaBody, err := llm.HttpGet(ctx, apiURL)
  690. if err != nil {
  691. return "", nil, errors.Wrapf(err, "fetch hf model metadata failed: %s", apiURL)
  692. }
  693. meta := hfModelAPIResponse{}
  694. if err := json.Unmarshal(metaBody, &meta); err != nil {
  695. return "", nil, errors.Wrap(err, "unmarshal hf model metadata")
  696. }
  697. if len(meta.Siblings) == 0 {
  698. return "", nil, errors.Errorf("hf model metadata has no siblings: %s", apiURL)
  699. }
  700. for _, s := range meta.Siblings {
  701. rf := strings.TrimSpace(s.RFilename)
  702. if rf == "" {
  703. continue
  704. }
  705. dst := filepath.Join(localDir, filepath.FromSlash(rf))
  706. if isNonEmptyFile(dst) {
  707. continue
  708. }
  709. if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
  710. return "", nil, errors.Wrapf(err, "mkdir for %s", dst)
  711. }
  712. fileURL := fmt.Sprintf("%s/%s/resolve/%s/%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.PathEscape(rev), escapeURLPathPreserveSlash(rf))
  713. if err := llm.HttpDownloadFile(ctx, fileURL, dst); err != nil {
  714. return "", nil, errors.Wrapf(err, "download file failed: %s -> %s", fileURL, dst)
  715. }
  716. }
  717. targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
  718. return modelName, []string{targetDir}, nil
  719. }