| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784 |
- package llm_container
- import (
- "context"
- "encoding/json"
- "fmt"
- "net/http"
- "net/url"
- "os"
- "path"
- "path/filepath"
- "strconv"
- "strings"
- "time"
- "unicode"
- "yunion.io/x/log"
- "yunion.io/x/pkg/errors"
- commonapi "yunion.io/x/onecloud/pkg/apis"
- computeapi "yunion.io/x/onecloud/pkg/apis/compute"
- api "yunion.io/x/onecloud/pkg/apis/llm"
- "yunion.io/x/onecloud/pkg/llm/models"
- "yunion.io/x/onecloud/pkg/mcclient"
- )
- func init() {
- models.RegisterLLMContainerDriver(newVLLM())
- }
- type vllm struct {
- baseDriver
- }
- func newVLLM() models.ILLMContainerDriver {
- return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)}
- }
- // escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”).
- func escapeShellSingleQuoted(s string) string {
- return strings.ReplaceAll(s, "'", "'\\''")
- }
- func shellQuoteSingle(s string) string {
- return "'" + escapeShellSingleQuoted(s) + "'"
- }
- var protectedVLLMArgKeys = map[string]struct{}{
- "model": {},
- "served-model-name": {},
- "port": {},
- "tensor-parallel-size": {},
- }
- func validateVLLMArgKey(key string) error {
- if key == "" {
- return errors.Error("vllm arg key is empty")
- }
- if strings.HasPrefix(key, "--") {
- return errors.Errorf("invalid vllm arg key %q: do not include leading --", key)
- }
- for _, r := range key {
- if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '_' {
- continue
- }
- return errors.Errorf("invalid vllm arg key %q", key)
- }
- if _, ok := protectedVLLMArgKeys[key]; ok {
- return errors.Errorf("vllm arg key %q is protected", key)
- }
- return nil
- }
- func normalizeVLLMCustomizedArgs(args []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
- if len(args) == 0 {
- return nil, nil
- }
- out := make([]*api.VllmCustomizedArg, 0, len(args))
- indexByKey := make(map[string]int, len(args))
- for _, arg := range args {
- if arg == nil {
- continue
- }
- key := strings.TrimSpace(arg.Key)
- if err := validateVLLMArgKey(key); err != nil {
- return nil, err
- }
- next := &api.VllmCustomizedArg{
- Key: key,
- Value: arg.Value,
- }
- if idx, ok := indexByKey[key]; ok {
- out[idx] = next
- continue
- }
- indexByKey[key] = len(out)
- out = append(out, next)
- }
- if len(out) == 0 {
- return nil, nil
- }
- return out, nil
- }
- func mergeVLLMCustomizedArgs(base, overrides []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
- out := make([]*api.VllmCustomizedArg, 0, len(base)+len(overrides))
- indexByKey := make(map[string]int, len(base)+len(overrides))
- appendNormalized := func(items []*api.VllmCustomizedArg) error {
- normalized, err := normalizeVLLMCustomizedArgs(items)
- if err != nil {
- return err
- }
- for _, arg := range normalized {
- if idx, ok := indexByKey[arg.Key]; ok {
- out[idx] = arg
- continue
- }
- indexByKey[arg.Key] = len(out)
- out = append(out, arg)
- }
- return nil
- }
- if err := appendNormalized(base); err != nil {
- return nil, err
- }
- if err := appendNormalized(overrides); err != nil {
- return nil, err
- }
- if len(out) == 0 {
- return nil, nil
- }
- return out, nil
- }
- func buildVLLMServeFlags(modelPath string, tensorParallelSize, defaultSwapSpaceGiB int, effSpec *api.LLMSpecVllm) []string {
- modelQuoted := shellQuoteSingle(modelPath)
- flags := []string{
- fmt.Sprintf("--model %s", modelQuoted),
- fmt.Sprintf(`--served-model-name "$(basename %s)"`, modelQuoted),
- fmt.Sprintf("--port %d", api.LLM_VLLM_DEFAULT_PORT),
- fmt.Sprintf("--tensor-parallel-size %d", tensorParallelSize),
- fmt.Sprintf("--swap-space %d", defaultSwapSpaceGiB),
- }
- if effSpec == nil || len(effSpec.CustomizedArgs) == 0 {
- return flags
- }
- normalizedArgs, err := normalizeVLLMCustomizedArgs(effSpec.CustomizedArgs)
- if err != nil {
- log.Errorf("normalize vllm customized args: %v", err)
- return flags
- }
- for _, arg := range normalizedArgs {
- flagName := "--" + arg.Key
- if arg.Key == "swap-space" {
- if arg.Value == "" {
- flags[4] = flagName
- } else {
- flags[4] = fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value))
- }
- continue
- }
- if arg.Value == "" {
- flags = append(flags, flagName)
- continue
- }
- flags = append(flags, fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value)))
- }
- return flags
- }
- func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} {
- if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil {
- return nil
- }
- return sku.LLMSpec.Vllm
- }
- func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} {
- var skuSpec *api.LLMSpecVllm
- if s := v.GetSpec(sku); s != nil {
- skuSpec = s.(*api.LLMSpecVllm)
- }
- var llmSpec *api.LLMSpecVllm
- if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
- llmSpec = llm.LLMSpec.Vllm
- }
- if skuSpec == nil && llmSpec == nil {
- return nil
- }
- out := &api.LLMSpecVllm{}
- if skuSpec != nil {
- out.PreferredModel = skuSpec.PreferredModel
- out.CustomizedArgs = skuSpec.CustomizedArgs
- }
- if llmSpec != nil {
- if llmSpec.PreferredModel != "" {
- out.PreferredModel = llmSpec.PreferredModel
- }
- }
- mergedArgs, err := mergeVLLMCustomizedArgs(out.CustomizedArgs, nil)
- if err != nil {
- log.Errorf("normalize sku vllm customized args: %v", err)
- out.CustomizedArgs = nil
- } else {
- out.CustomizedArgs = mergedArgs
- }
- if llmSpec != nil {
- mergedArgs, err = mergeVLLMCustomizedArgs(out.CustomizedArgs, llmSpec.CustomizedArgs)
- if err != nil {
- log.Errorf("merge vllm customized args: %v", err)
- } else {
- out.CustomizedArgs = mergedArgs
- }
- }
- return out
- }
- func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
- input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input)
- if err != nil {
- return nil, err
- }
- // Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU.
- spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
- if err != nil {
- return nil, err
- }
- if spec == nil {
- spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}}
- } else if spec.Vllm == nil {
- spec.Vllm = &api.LLMSpecVllm{}
- }
- input.LLMSpec = spec
- return input, nil
- }
- func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
- input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input)
- if err != nil {
- return nil, err
- }
- if input.LLMSpec == nil {
- return input, nil
- }
- // Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec".
- fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
- spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
- if err != nil {
- return nil, err
- }
- input.LLMSpec = spec
- if input.LLMSpec != nil && input.LLMSpec.Vllm == nil {
- input.LLMSpec.Vllm = &api.LLMSpecVllm{}
- }
- return input, nil
- }
- // ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty.
- func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
- if input == nil {
- return nil, nil
- }
- if input.Vllm == nil {
- input.Vllm = &api.LLMSpecVllm{}
- }
- preferred := input.Vllm.PreferredModel
- if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
- preferred = sku.LLMSpec.Vllm.PreferredModel
- }
- spec := &api.LLMSpecVllm{}
- if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
- base := *sku.LLMSpec.Vllm
- spec = &base
- }
- // Apply create overrides
- if preferred != "" {
- spec.PreferredModel = preferred
- }
- mergedArgs, err := mergeVLLMCustomizedArgs(spec.CustomizedArgs, input.Vllm.CustomizedArgs)
- if err != nil {
- return nil, err
- }
- spec.CustomizedArgs = mergedArgs
- return &api.LLMSpec{Vllm: spec}, nil
- }
- // ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty.
- func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
- if input == nil || input.Vllm == nil {
- return input, nil
- }
- base := &api.LLMSpecVllm{}
- if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
- b := *llm.LLMSpec.Vllm
- base = &b
- }
- // preferred_model: only overwrite when non-empty
- if input.Vllm.PreferredModel != "" {
- base.PreferredModel = input.Vllm.PreferredModel
- }
- mergedArgs, err := mergeVLLMCustomizedArgs(base.CustomizedArgs, input.Vllm.CustomizedArgs)
- if err != nil {
- return nil, err
- }
- base.CustomizedArgs = mergedArgs
- return &api.LLMSpec{Vllm: base}, nil
- }
- func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
- // Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec.
- startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity`
- envs := []*commonapi.ContainerKeyValue{
- {
- Key: "HUGGING_FACE_HUB_CACHE",
- Value: api.LLM_VLLM_CACHE_DIR,
- },
- {
- Key: "HF_ENDPOINT",
- Value: api.LLM_VLLM_HF_ENDPOINT,
- },
- // // Fix Error 803
- // {
- // Key: "LD_LIBRARY_PATH",
- // Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}",
- // },
- // // Fix Error 803
- // {
- // Key: "LD_PRELOAD",
- // Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so",
- // },
- }
- spec := computeapi.ContainerSpec{
- ContainerSpec: commonapi.ContainerSpec{
- Image: image.ToContainerImage(),
- ImageCredentialId: image.CredentialId,
- Command: []string{"/bin/sh", "-c"},
- Args: []string{startScript},
- EnableLxcfs: true,
- AlwaysRestart: true,
- Envs: envs,
- },
- }
- // GPU Devices
- if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
- for i := range *sku.Devices {
- index := i
- spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
- Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
- IsolatedDevice: &computeapi.ContainerIsolatedDevice{
- Index: &index,
- },
- })
- }
- } else if len(devices) > 0 {
- for i := range devices {
- spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
- Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
- IsolatedDevice: &computeapi.ContainerIsolatedDevice{
- Id: devices[i].Id,
- },
- })
- }
- }
- // Volume Mounts
- diskIndex := 0
- postOverlays, err := llm.GetMountedModelsPostOverlay()
- if err != nil {
- log.Errorf("GetMountedModelsPostOverlay failed %s", err)
- }
- ctrVols := []*commonapi.ContainerVolumeMount{
- {
- Disk: &commonapi.ContainerVolumeMountDisk{
- SubDirectory: api.LLM_VLLM,
- Index: &diskIndex,
- PostOverlay: postOverlays,
- },
- Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
- MountPath: api.LLM_VLLM_BASE_PATH,
- ReadOnly: false,
- Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER,
- },
- {
- // Mount cache dir to save HF cache
- Disk: &commonapi.ContainerVolumeMountDisk{
- SubDirectory: "cache",
- Index: &diskIndex,
- },
- Type: commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
- MountPath: "/root/.cache",
- ReadOnly: false,
- },
- }
- spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...)
- return &computeapi.PodContainerCreateInput{
- ContainerSpec: spec,
- }
- }
- func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput {
- return []*computeapi.PodContainerCreateInput{
- v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId),
- }
- }
- func (v *vllm) GetLLMAccessUrlInfo(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *models.LLMAccessInfoInput) (*api.LLMAccessUrlInfo, error) {
- return models.GetLLMAccessUrlInfo(ctx, userCred, llm, input, "http", api.LLM_VLLM_DEFAULT_PORT)
- }
- func buildVLLMHealthCheckURL(networkType, llmIP, hostAccessIP string, accessInfo *models.SAccessInfo) (string, error) {
- if networkType == string(computeapi.NETWORK_TYPE_GUEST) {
- if len(llmIP) == 0 {
- return "", errors.Error("LLM IP is empty for guest network")
- }
- return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
- }
- if len(llmIP) > 0 {
- return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
- }
- if len(hostAccessIP) == 0 {
- return "", errors.Error("host access IP is empty")
- }
- port := api.LLM_VLLM_DEFAULT_PORT
- if accessInfo != nil && accessInfo.AccessPort > 0 {
- port = accessInfo.AccessPort
- }
- return fmt.Sprintf("http://%s:%d/health", hostAccessIP, port), nil
- }
- // resolveModelPath resolves the model directory inside the container.
- // It prefers preferredPath when it exists; otherwise it picks the first directory under models path.
- // Returns (empty, nil) when no model is found.
- func (v *vllm) resolveModelPath(ctx context.Context, containerId string, preferredPath string) (string, error) {
- preferredQuoted := shellQuoteSingle(preferredPath)
- cmd := fmt.Sprintf(
- `mkdir -p %s;
- preferred=%s;
- if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi;
- if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi;
- printf '%%s\n' "$model"`,
- api.LLM_VLLM_MODELS_PATH,
- preferredQuoted,
- api.LLM_VLLM_MODELS_PATH,
- )
- out, err := exec(ctx, containerId, cmd, 30)
- if err != nil {
- return "", errors.Wrap(err, "exec resolve model path")
- }
- out = strings.TrimSpace(out)
- if out == "NO_MODEL" || out == "" {
- return "", nil
- }
- return out, nil
- }
- // StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready.
- func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error {
- lc, err := llm.GetLLMContainer()
- if err != nil {
- return errors.Wrap(err, "get llm container")
- }
- sku, err := llm.GetLLMSku(llm.LLMSkuId)
- if err != nil {
- return errors.Wrap(err, "get llm sku")
- }
- tensorParallelSize := 1
- if sku.Devices != nil && len(*sku.Devices) > 0 {
- tensorParallelSize = len(*sku.Devices)
- }
- swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
- if swapSpaceGiB < 1 {
- swapSpaceGiB = 1
- }
- effSpec := (*api.LLMSpecVllm)(nil)
- preferredPath := ""
- if eff := v.GetEffectiveSpec(llm, sku); eff != nil {
- effSpec = eff.(*api.LLMSpecVllm)
- if preferred := effSpec.PreferredModel; preferred != "" {
- preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred)
- }
- }
- modelPath, err := v.resolveModelPath(ctx, lc.CmpId, preferredPath)
- if err != nil {
- return err
- }
- if modelPath == "" {
- return nil // no model
- }
- startCmd := fmt.Sprintf(
- "nohup %s %s > /tmp/vllm.log 2>&1 &",
- api.LLM_VLLM_EXEC_PATH,
- strings.Join(buildVLLMServeFlags(modelPath, tensorParallelSize, swapSpaceGiB, effSpec), " "),
- )
- _, err = exec(ctx, lc.CmpId, startCmd, 30)
- if err != nil {
- log.Errorf("vLLM start failed, exec command: %s", startCmd)
- return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd)
- }
- cmd := startCmd
- // Wait for health endpoint
- input, err := llm.GetLLMAccessInfoInput(ctx, userCred)
- if err != nil {
- return errors.Wrap(err, "get llm url for health check")
- }
- var accessInfo *models.SAccessInfo
- for i := range input.AccessInfos {
- if input.AccessInfos[i].ListenPort == api.LLM_VLLM_DEFAULT_PORT {
- accessInfo = &input.AccessInfos[i]
- break
- }
- }
- if accessInfo == nil && len(input.AccessInfos) > 0 {
- accessInfo = &input.AccessInfos[0]
- }
- healthURL, err := buildVLLMHealthCheckURL(llm.NetworkType, llm.LLMIp, input.HostInternalIp, accessInfo)
- if err != nil {
- return errors.Wrap(err, "build health check url")
- }
- deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT)
- for time.Now().Before(deadline) {
- req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil)
- if err != nil {
- return errors.Wrap(err, "new health check request")
- }
- resp, err := http.DefaultClient.Do(req)
- if err == nil {
- resp.Body.Close()
- if resp.StatusCode == http.StatusOK {
- return nil
- }
- }
- select {
- case <-ctx.Done():
- return errors.Wrap(ctx.Err(), "context done while waiting for vLLM")
- case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL):
- // continue
- }
- }
- // Optionally read last lines of /tmp/vllm.log for better error message
- logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5)
- if logTail != "" {
- return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail))
- }
- return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd)
- }
- // ILLMContainerInstantApp implementation
- func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) {
- lc, err := llm.GetLLMContainer()
- if err != nil {
- return nil, errors.Wrap(err, "get llm container")
- }
- // List directories in models path
- cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH)
- output, err := exec(ctx, lc.CmpId, cmd, 10)
- if err != nil {
- // If ls fails, maybe no directory yet, return empty
- return make(map[string]api.LLMInternalInstantMdlInfo), nil
- }
- modelsMap := make(map[string]api.LLMInternalInstantMdlInfo)
- lines := strings.Split(strings.TrimSpace(output), "\n")
- for _, line := range lines {
- fields := strings.Fields(line)
- if len(fields) < 2 {
- continue
- }
- // Size is in KB
- sizeKB, _ := strconv.ParseInt(fields[0], 10, 64)
- fullPath := fields[1]
- name := path.Base(fullPath)
- if name == "" {
- continue
- }
- // We treat the directory name as the model name
- // For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs
- modelsMap[name] = api.LLMInternalInstantMdlInfo{
- Name: name,
- ModelId: name,
- Tag: "latest",
- Size: sizeKB * 1024,
- }
- }
- return modelsMap, nil
- }
- func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) {
- lc, err := llm.GetLLMContainer()
- if err != nil {
- return nil, errors.Wrap(err, "get llm container")
- }
- modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name)
- checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath)
- output, err := exec(ctx, lc.CmpId, checkCmd, 10)
- if err != nil {
- return nil, errors.Wrap(err, "failed to check file existence")
- }
- if !strings.Contains(output, "EXIST") {
- return nil, errors.Errorf("model directory %s missing", modelPath)
- }
- return []string{modelPath}, nil
- }
- func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string {
- // Map host paths to container paths
- // For vLLM simple volume mount, this might be 1:1 or based on the base path
- res := make(map[string]string)
- for _, mount := range sApp.Mounts {
- relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
- res[relPath] = path.Join(api.LLM_VLLM, relPath)
- }
- return res
- }
- func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) {
- var filteredMounts []string
- for _, mount := range sApp.Mounts {
- if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) {
- relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
- filteredMounts = append(filteredMounts, relPath)
- }
- }
- return "", filteredMounts, nil
- }
- func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) {
- return mounts, nil
- }
- func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string {
- return "Duplicate mounts detected"
- }
- func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string {
- return ""
- }
- func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay {
- uid := int64(1000)
- gid := int64(1000)
- ov := dir.ToOverlay()
- ov.FsUser = &uid
- ov.FsGroup = &gid
- return &ov
- }
- func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
- lc, err := llm.GetLLMContainer()
- if err != nil {
- return errors.Wrap(err, "get llm container")
- }
- // Create base directory
- cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH)
- _, err = exec(ctx, lc.CmpId, cmd, 10)
- return err
- }
- func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error {
- return nil
- }
- func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
- // Optionally remove the model directory
- // For safety, we might just log or leave it
- return nil
- }
- func resolveHfdRevision(modelTag string) string {
- if strings.TrimSpace(modelTag) == "" {
- return "main"
- }
- return strings.TrimSpace(modelTag)
- }
- type hfModelAPIResponse struct {
- Siblings []struct {
- RFilename string `json:"rfilename"`
- } `json:"siblings"`
- }
- func escapeURLPathPreserveSlash(p string) string {
- if p == "" {
- return ""
- }
- parts := strings.Split(p, "/")
- for i := range parts {
- parts[i] = url.PathEscape(parts[i])
- }
- return strings.Join(parts, "/")
- }
- func isNonEmptyFile(p string) bool {
- st, err := os.Stat(p)
- if err != nil {
- return false
- }
- return !st.IsDir() && st.Size() > 0
- }
- func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
- // Download HF model on host into tmpDir for instant-model import.
- // We place files under tmpDir/huggingface/<repo> so that the archive contains relative paths.
- if strings.TrimSpace(tmpDir) == "" {
- return "", nil, errors.Error("tmpDir is empty")
- }
- if strings.TrimSpace(modelName) == "" {
- return "", nil, errors.Error("modelName is empty")
- }
- modelBase := filepath.Base(modelName)
- localDir := filepath.Join(tmpDir, "huggingface", modelBase)
- if err := os.MkdirAll(localDir, 0755); err != nil {
- return "", nil, errors.Wrap(err, "mkdir local model dir")
- }
- // If already downloaded, short-circuit (directory exists and non-empty).
- if entries, err := os.ReadDir(localDir); err == nil && len(entries) > 0 {
- targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
- log.Infof("Model %s already exists in import dir %s", modelName, localDir)
- return modelName, []string{targetDir}, nil
- }
- rev := resolveHfdRevision(modelTag)
- apiURL := fmt.Sprintf("%s/api/models/%s?revision=%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.QueryEscape(rev))
- log.Infof("Downloading HF model via HF Mirror API: %s", func() string {
- b, _ := json.Marshal(map[string]string{
- "model": modelName,
- "revision": rev,
- "dir": localDir,
- "endpoint": api.LLM_VLLM_HF_ENDPOINT,
- "api": apiURL,
- })
- return string(b)
- }())
- metaBody, err := llm.HttpGet(ctx, apiURL)
- if err != nil {
- return "", nil, errors.Wrapf(err, "fetch hf model metadata failed: %s", apiURL)
- }
- meta := hfModelAPIResponse{}
- if err := json.Unmarshal(metaBody, &meta); err != nil {
- return "", nil, errors.Wrap(err, "unmarshal hf model metadata")
- }
- if len(meta.Siblings) == 0 {
- return "", nil, errors.Errorf("hf model metadata has no siblings: %s", apiURL)
- }
- for _, s := range meta.Siblings {
- rf := strings.TrimSpace(s.RFilename)
- if rf == "" {
- continue
- }
- dst := filepath.Join(localDir, filepath.FromSlash(rf))
- if isNonEmptyFile(dst) {
- continue
- }
- if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
- return "", nil, errors.Wrapf(err, "mkdir for %s", dst)
- }
- fileURL := fmt.Sprintf("%s/%s/resolve/%s/%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.PathEscape(rev), escapeURLPathPreserveSlash(rf))
- if err := llm.HttpDownloadFile(ctx, fileURL, dst); err != nil {
- return "", nil, errors.Wrapf(err, "download file failed: %s -> %s", fileURL, dst)
- }
- }
- targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
- return modelName, []string{targetDir}, nil
- }
|