zhouyuhuan
/
Cloudpods


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
							package llm_container

import (
	"context"
	"encoding/json"
	"fmt"
	"net/http"
	"net/url"
	"os"
	"path"
	"path/filepath"
	"strconv"
	"strings"
	"time"
	"unicode"

	"yunion.io/x/log"
	"yunion.io/x/pkg/errors"

	commonapi "yunion.io/x/onecloud/pkg/apis"
	computeapi "yunion.io/x/onecloud/pkg/apis/compute"
	api "yunion.io/x/onecloud/pkg/apis/llm"
	"yunion.io/x/onecloud/pkg/llm/models"
	"yunion.io/x/onecloud/pkg/mcclient"
)

func init() {
	models.RegisterLLMContainerDriver(newVLLM())
}

type vllm struct {
	baseDriver
}

func newVLLM() models.ILLMContainerDriver {
	return &vllm{baseDriver: newBaseDriver(api.LLM_CONTAINER_VLLM)}
}

// escapeShellSingleQuoted escapes s for use inside a single-quoted shell string (each ' becomes '\”).
func escapeShellSingleQuoted(s string) string {
	return strings.ReplaceAll(s, "'", "'\\''")
}

func shellQuoteSingle(s string) string {
	return "'" + escapeShellSingleQuoted(s) + "'"
}

var protectedVLLMArgKeys = map[string]struct{}{
	"model":                {},
	"served-model-name":    {},
	"port":                 {},
	"tensor-parallel-size": {},
}

func validateVLLMArgKey(key string) error {
	if key == "" {
		return errors.Error("vllm arg key is empty")
	}
	if strings.HasPrefix(key, "--") {
		return errors.Errorf("invalid vllm arg key %q: do not include leading --", key)
	}
	for _, r := range key {
		if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '-' || r == '_' {
			continue
		}
		return errors.Errorf("invalid vllm arg key %q", key)
	}
	if _, ok := protectedVLLMArgKeys[key]; ok {
		return errors.Errorf("vllm arg key %q is protected", key)
	}
	return nil
}

func normalizeVLLMCustomizedArgs(args []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
	if len(args) == 0 {
		return nil, nil
	}
	out := make([]*api.VllmCustomizedArg, 0, len(args))
	indexByKey := make(map[string]int, len(args))
	for _, arg := range args {
		if arg == nil {
			continue
		}
		key := strings.TrimSpace(arg.Key)
		if err := validateVLLMArgKey(key); err != nil {
			return nil, err
		}
		next := &api.VllmCustomizedArg{
			Key:   key,
			Value: arg.Value,
		}
		if idx, ok := indexByKey[key]; ok {
			out[idx] = next
			continue
		}
		indexByKey[key] = len(out)
		out = append(out, next)
	}
	if len(out) == 0 {
		return nil, nil
	}
	return out, nil
}

func mergeVLLMCustomizedArgs(base, overrides []*api.VllmCustomizedArg) ([]*api.VllmCustomizedArg, error) {
	out := make([]*api.VllmCustomizedArg, 0, len(base)+len(overrides))
	indexByKey := make(map[string]int, len(base)+len(overrides))
	appendNormalized := func(items []*api.VllmCustomizedArg) error {
		normalized, err := normalizeVLLMCustomizedArgs(items)
		if err != nil {
			return err
		}
		for _, arg := range normalized {
			if idx, ok := indexByKey[arg.Key]; ok {
				out[idx] = arg
				continue
			}
			indexByKey[arg.Key] = len(out)
			out = append(out, arg)
		}
		return nil
	}
	if err := appendNormalized(base); err != nil {
		return nil, err
	}
	if err := appendNormalized(overrides); err != nil {
		return nil, err
	}
	if len(out) == 0 {
		return nil, nil
	}
	return out, nil
}

func buildVLLMServeFlags(modelPath string, tensorParallelSize, defaultSwapSpaceGiB int, effSpec *api.LLMSpecVllm) []string {
	modelQuoted := shellQuoteSingle(modelPath)
	flags := []string{
		fmt.Sprintf("--model %s", modelQuoted),
		fmt.Sprintf(`--served-model-name "$(basename %s)"`, modelQuoted),
		fmt.Sprintf("--port %d", api.LLM_VLLM_DEFAULT_PORT),
		fmt.Sprintf("--tensor-parallel-size %d", tensorParallelSize),
		fmt.Sprintf("--swap-space %d", defaultSwapSpaceGiB),
	}
	if effSpec == nil || len(effSpec.CustomizedArgs) == 0 {
		return flags
	}

	normalizedArgs, err := normalizeVLLMCustomizedArgs(effSpec.CustomizedArgs)
	if err != nil {
		log.Errorf("normalize vllm customized args: %v", err)
		return flags
	}
	for _, arg := range normalizedArgs {
		flagName := "--" + arg.Key
		if arg.Key == "swap-space" {
			if arg.Value == "" {
				flags[4] = flagName
			} else {
				flags[4] = fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value))
			}
			continue
		}
		if arg.Value == "" {
			flags = append(flags, flagName)
			continue
		}
		flags = append(flags, fmt.Sprintf("%s %s", flagName, shellQuoteSingle(arg.Value)))
	}
	return flags
}

func (v *vllm) GetSpec(sku *models.SLLMSku) interface{} {
	if sku == nil || sku.LLMType != string(api.LLM_CONTAINER_VLLM) || sku.LLMSpec == nil || sku.LLMSpec.Vllm == nil {
		return nil
	}
	return sku.LLMSpec.Vllm
}

func (v *vllm) GetEffectiveSpec(llm *models.SLLM, sku *models.SLLMSku) interface{} {
	var skuSpec *api.LLMSpecVllm
	if s := v.GetSpec(sku); s != nil {
		skuSpec = s.(*api.LLMSpecVllm)
	}
	var llmSpec *api.LLMSpecVllm
	if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
		llmSpec = llm.LLMSpec.Vllm
	}
	if skuSpec == nil && llmSpec == nil {
		return nil
	}
	out := &api.LLMSpecVllm{}
	if skuSpec != nil {
		out.PreferredModel = skuSpec.PreferredModel
		out.CustomizedArgs = skuSpec.CustomizedArgs
	}
	if llmSpec != nil {
		if llmSpec.PreferredModel != "" {
			out.PreferredModel = llmSpec.PreferredModel
		}
	}
	mergedArgs, err := mergeVLLMCustomizedArgs(out.CustomizedArgs, nil)
	if err != nil {
		log.Errorf("normalize sku vllm customized args: %v", err)
		out.CustomizedArgs = nil
	} else {
		out.CustomizedArgs = mergedArgs
	}
	if llmSpec != nil {
		mergedArgs, err = mergeVLLMCustomizedArgs(out.CustomizedArgs, llmSpec.CustomizedArgs)
		if err != nil {
			log.Errorf("merge vllm customized args: %v", err)
		} else {
			out.CustomizedArgs = mergedArgs
		}
	}
	return out
}

func (v *vllm) ValidateLLMSkuCreateData(ctx context.Context, userCred mcclient.TokenCredential, input *api.LLMSkuCreateInput) (*api.LLMSkuCreateInput, error) {
	input, err := v.baseDriver.ValidateLLMSkuCreateData(ctx, userCred, input)
	if err != nil {
		return nil, err
	}

	// Reuse ValidateLLMCreateSpec; ensure llm_spec.vllm always exists for vLLM SKU.
	spec, err := v.ValidateLLMCreateSpec(ctx, userCred, nil, input.LLMSpec)
	if err != nil {
		return nil, err
	}
	if spec == nil {
		spec = &api.LLMSpec{Vllm: &api.LLMSpecVllm{}}
	} else if spec.Vllm == nil {
		spec.Vllm = &api.LLMSpecVllm{}
	}
	input.LLMSpec = spec
	return input, nil
}

func (v *vllm) ValidateLLMSkuUpdateData(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSkuUpdateInput) (*api.LLMSkuUpdateInput, error) {
	input, err := v.baseDriver.ValidateLLMSkuUpdateData(ctx, userCred, sku, input)
	if err != nil {
		return nil, err
	}
	if input.LLMSpec == nil {
		return input, nil
	}

	// Reuse ValidateLLMUpdateSpec by treating current SKU spec as the "current llm spec".
	fakeLLM := &models.SLLM{LLMSpec: sku.LLMSpec}
	spec, err := v.ValidateLLMUpdateSpec(ctx, userCred, fakeLLM, input.LLMSpec)
	if err != nil {
		return nil, err
	}
	input.LLMSpec = spec
	if input.LLMSpec != nil && input.LLMSpec.Vllm == nil {
		input.LLMSpec.Vllm = &api.LLMSpecVllm{}
	}
	return input, nil
}

// ValidateLLMCreateSpec implements ILLMContainerDriver. Merges preferred_model from SKU when input's is empty.
func (v *vllm) ValidateLLMCreateSpec(ctx context.Context, userCred mcclient.TokenCredential, sku *models.SLLMSku, input *api.LLMSpec) (*api.LLMSpec, error) {
	if input == nil {
		return nil, nil
	}
	if input.Vllm == nil {
		input.Vllm = &api.LLMSpecVllm{}
	}

	preferred := input.Vllm.PreferredModel
	if preferred == "" && sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
		preferred = sku.LLMSpec.Vllm.PreferredModel
	}

	spec := &api.LLMSpecVllm{}
	if sku != nil && sku.LLMSpec != nil && sku.LLMSpec.Vllm != nil {
		base := *sku.LLMSpec.Vllm
		spec = &base
	}
	// Apply create overrides
	if preferred != "" {
		spec.PreferredModel = preferred
	}
	mergedArgs, err := mergeVLLMCustomizedArgs(spec.CustomizedArgs, input.Vllm.CustomizedArgs)
	if err != nil {
		return nil, err
	}
	spec.CustomizedArgs = mergedArgs

	return &api.LLMSpec{Vllm: spec}, nil
}

// ValidateLLMUpdateSpec implements ILLMContainerDriver. Merges preferred_model with current LLM spec; only overwrite when non-empty.
func (v *vllm) ValidateLLMUpdateSpec(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *api.LLMSpec) (*api.LLMSpec, error) {
	if input == nil || input.Vllm == nil {
		return input, nil
	}
	base := &api.LLMSpecVllm{}
	if llm != nil && llm.LLMSpec != nil && llm.LLMSpec.Vllm != nil {
		b := *llm.LLMSpec.Vllm
		base = &b
	}

	// preferred_model: only overwrite when non-empty
	if input.Vllm.PreferredModel != "" {
		base.PreferredModel = input.Vllm.PreferredModel
	}
	mergedArgs, err := mergeVLLMCustomizedArgs(base.CustomizedArgs, input.Vllm.CustomizedArgs)
	if err != nil {
		return nil, err
	}
	base.CustomizedArgs = mergedArgs

	return &api.LLMSpec{Vllm: base}, nil
}

func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) *computeapi.PodContainerCreateInput {
	// Container entrypoint only keeps the container alive; vLLM is started by StartLLM via exec.
	startScript := `mkdir -p ` + api.LLM_VLLM_MODELS_PATH + ` && exec sleep infinity`
	envs := []*commonapi.ContainerKeyValue{
		{
			Key:   "HUGGING_FACE_HUB_CACHE",
			Value: api.LLM_VLLM_CACHE_DIR,
		},
		{
			Key:   "HF_ENDPOINT",
			Value: api.LLM_VLLM_HF_ENDPOINT,
		},
		// // Fix Error 803
		// {
		// 	Key:   "LD_LIBRARY_PATH",
		// 	Value: "/lib64:/usr/local/cuda/lib64:/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}",
		// },
		// // Fix Error 803
		// {
		// 	Key:   "LD_PRELOAD",
		// 	Value: "/lib/libcuda.so.1 /lib/libnvidia-ptxjitcompiler.so.1 /lib/libnvidia-gpucomp.so",
		// },
	}
	spec := computeapi.ContainerSpec{
		ContainerSpec: commonapi.ContainerSpec{
			Image:             image.ToContainerImage(),
			ImageCredentialId: image.CredentialId,
			Command:           []string{"/bin/sh", "-c"},
			Args:              []string{startScript},
			EnableLxcfs:       true,
			AlwaysRestart:     true,
			Envs:              envs,
		},
	}

	// GPU Devices
	if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
		for i := range *sku.Devices {
			index := i
			spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
				Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
				IsolatedDevice: &computeapi.ContainerIsolatedDevice{
					Index: &index,
				},
			})
		}
	} else if len(devices) > 0 {
		for i := range devices {
			spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
				Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
				IsolatedDevice: &computeapi.ContainerIsolatedDevice{
					Id: devices[i].Id,
				},
			})
		}
	}

	// Volume Mounts
	diskIndex := 0
	postOverlays, err := llm.GetMountedModelsPostOverlay()
	if err != nil {
		log.Errorf("GetMountedModelsPostOverlay failed %s", err)
	}
	ctrVols := []*commonapi.ContainerVolumeMount{
		{
			Disk: &commonapi.ContainerVolumeMountDisk{
				SubDirectory: api.LLM_VLLM,
				Index:        &diskIndex,
				PostOverlay:  postOverlays,
			},
			Type:        commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
			MountPath:   api.LLM_VLLM_BASE_PATH,
			ReadOnly:    false,
			Propagation: commonapi.MOUNTPROPAGATION_PROPAGATION_HOST_TO_CONTAINER,
		},
		{
			// Mount cache dir to save HF cache
			Disk: &commonapi.ContainerVolumeMountDisk{
				SubDirectory: "cache",
				Index:        &diskIndex,
			},
			Type:      commonapi.CONTAINER_VOLUME_MOUNT_TYPE_DISK,
			MountPath: "/root/.cache",
			ReadOnly:  false,
		},
	}
	spec.VolumeMounts = append(spec.VolumeMounts, ctrVols...)

	return &computeapi.PodContainerCreateInput{
		ContainerSpec: spec,
	}
}

func (v *vllm) GetContainerSpecs(ctx context.Context, llm *models.SLLM, image *models.SLLMImage, sku *models.SLLMSku, props []string, devices []computeapi.SIsolatedDevice, diskId string) []*computeapi.PodContainerCreateInput {
	return []*computeapi.PodContainerCreateInput{
		v.GetContainerSpec(ctx, llm, image, sku, props, devices, diskId),
	}
}

func (v *vllm) GetLLMAccessUrlInfo(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, input *models.LLMAccessInfoInput) (*api.LLMAccessUrlInfo, error) {
	return models.GetLLMAccessUrlInfo(ctx, userCred, llm, input, "http", api.LLM_VLLM_DEFAULT_PORT)
}

func buildVLLMHealthCheckURL(networkType, llmIP, hostAccessIP string, accessInfo *models.SAccessInfo) (string, error) {
	if networkType == string(computeapi.NETWORK_TYPE_GUEST) {
		if len(llmIP) == 0 {
			return "", errors.Error("LLM IP is empty for guest network")
		}
		return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
	}
	if len(llmIP) > 0 {
		return fmt.Sprintf("http://%s:%d/health", llmIP, api.LLM_VLLM_DEFAULT_PORT), nil
	}
	if len(hostAccessIP) == 0 {
		return "", errors.Error("host access IP is empty")
	}
	port := api.LLM_VLLM_DEFAULT_PORT
	if accessInfo != nil && accessInfo.AccessPort > 0 {
		port = accessInfo.AccessPort
	}
	return fmt.Sprintf("http://%s:%d/health", hostAccessIP, port), nil
}

// resolveModelPath resolves the model directory inside the container.
// It prefers preferredPath when it exists; otherwise it picks the first directory under models path.
// Returns (empty, nil) when no model is found.
func (v *vllm) resolveModelPath(ctx context.Context, containerId string, preferredPath string) (string, error) {
	preferredQuoted := shellQuoteSingle(preferredPath)
	cmd := fmt.Sprintf(
		`mkdir -p %s;
		preferred=%s;
		if [ -n "$preferred" ] && [ -d "$preferred" ]; then model="$preferred"; else model=$(ls -d %s/* 2>/dev/null | head -n 1); fi;
		if [ -z "$model" ]; then echo "NO_MODEL"; exit 0; fi;
		printf '%%s\n' "$model"`,
		api.LLM_VLLM_MODELS_PATH,
		preferredQuoted,
		api.LLM_VLLM_MODELS_PATH,
	)
	out, err := exec(ctx, containerId, cmd, 30)
	if err != nil {
		return "", errors.Wrap(err, "exec resolve model path")
	}
	out = strings.TrimSpace(out)
	if out == "NO_MODEL" || out == "" {
		return "", nil
	}
	return out, nil
}

// StartLLM starts the vLLM server inside the container via exec, then waits for the health endpoint to be ready.
func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM) error {
	lc, err := llm.GetLLMContainer()
	if err != nil {
		return errors.Wrap(err, "get llm container")
	}
	sku, err := llm.GetLLMSku(llm.LLMSkuId)
	if err != nil {
		return errors.Wrap(err, "get llm sku")
	}
	tensorParallelSize := 1
	if sku.Devices != nil && len(*sku.Devices) > 0 {
		tensorParallelSize = len(*sku.Devices)
	}
	swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
	if swapSpaceGiB < 1 {
		swapSpaceGiB = 1
	}

	effSpec := (*api.LLMSpecVllm)(nil)
	preferredPath := ""
	if eff := v.GetEffectiveSpec(llm, sku); eff != nil {
		effSpec = eff.(*api.LLMSpecVllm)
		if preferred := effSpec.PreferredModel; preferred != "" {
			preferredPath = path.Join(api.LLM_VLLM_MODELS_PATH, preferred)
		}
	}
	modelPath, err := v.resolveModelPath(ctx, lc.CmpId, preferredPath)
	if err != nil {
		return err
	}
	if modelPath == "" {
		return nil // no model
	}

	startCmd := fmt.Sprintf(
		"nohup %s %s > /tmp/vllm.log 2>&1 &",
		api.LLM_VLLM_EXEC_PATH,
		strings.Join(buildVLLMServeFlags(modelPath, tensorParallelSize, swapSpaceGiB, effSpec), " "),
	)
	_, err = exec(ctx, lc.CmpId, startCmd, 30)
	if err != nil {
		log.Errorf("vLLM start failed, exec command: %s", startCmd)
		return errors.Wrapf(err, "exec start vLLM, command: %s", startCmd)
	}
	cmd := startCmd
	// Wait for health endpoint

	input, err := llm.GetLLMAccessInfoInput(ctx, userCred)
	if err != nil {
		return errors.Wrap(err, "get llm url for health check")
	}
	var accessInfo *models.SAccessInfo
	for i := range input.AccessInfos {
		if input.AccessInfos[i].ListenPort == api.LLM_VLLM_DEFAULT_PORT {
			accessInfo = &input.AccessInfos[i]
			break
		}
	}
	if accessInfo == nil && len(input.AccessInfos) > 0 {
		accessInfo = &input.AccessInfos[0]
	}
	healthURL, err := buildVLLMHealthCheckURL(llm.NetworkType, llm.LLMIp, input.HostInternalIp, accessInfo)
	if err != nil {
		return errors.Wrap(err, "build health check url")
	}
	deadline := time.Now().Add(api.LLM_VLLM_HEALTH_CHECK_TIMEOUT)
	for time.Now().Before(deadline) {
		req, err := http.NewRequestWithContext(ctx, http.MethodGet, healthURL, nil)
		if err != nil {
			return errors.Wrap(err, "new health check request")
		}
		resp, err := http.DefaultClient.Do(req)
		if err == nil {
			resp.Body.Close()
			if resp.StatusCode == http.StatusOK {
				return nil
			}
		}
		select {
		case <-ctx.Done():
			return errors.Wrap(ctx.Err(), "context done while waiting for vLLM")
		case <-time.After(api.LLM_VLLM_HEALTH_CHECK_INTERVAL):
			// continue
		}
	}
	// Optionally read last lines of /tmp/vllm.log for better error message
	logTail, _ := exec(ctx, lc.CmpId, "tail -n 20 /tmp/vllm.log 2>/dev/null || true", 5)
	if logTail != "" {
		return errors.Errorf("vLLM health check timeout after %v, exec command: %s, last log: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd, strings.TrimSpace(logTail))
	}
	return errors.Errorf("vLLM health check timeout after %v, exec command: %s", api.LLM_VLLM_HEALTH_CHECK_TIMEOUT, cmd)
}

// ILLMContainerInstantApp implementation

func (v *vllm) GetProbedInstantModelsExt(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, mdlIds ...string) (map[string]api.LLMInternalInstantMdlInfo, error) {
	lc, err := llm.GetLLMContainer()
	if err != nil {
		return nil, errors.Wrap(err, "get llm container")
	}

	// List directories in models path
	cmd := fmt.Sprintf("du -sk %s/*/", api.LLM_VLLM_MODELS_PATH)
	output, err := exec(ctx, lc.CmpId, cmd, 10)
	if err != nil {
		// If ls fails, maybe no directory yet, return empty
		return make(map[string]api.LLMInternalInstantMdlInfo), nil
	}

	modelsMap := make(map[string]api.LLMInternalInstantMdlInfo)
	lines := strings.Split(strings.TrimSpace(output), "\n")
	for _, line := range lines {
		fields := strings.Fields(line)
		if len(fields) < 2 {
			continue
		}
		// Size is in KB
		sizeKB, _ := strconv.ParseInt(fields[0], 10, 64)
		fullPath := fields[1]
		name := path.Base(fullPath)
		if name == "" {
			continue
		}
		// We treat the directory name as the model name
		// For vLLM, name usually implies "organization/model" if downloaded from HF, but here we just list local dirs
		modelsMap[name] = api.LLMInternalInstantMdlInfo{
			Name:    name,
			ModelId: name,
			Tag:     "latest",
			Size:    sizeKB * 1024,
		}
	}
	return modelsMap, nil
}

func (v *vllm) DetectModelPaths(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, pkgInfo api.LLMInternalInstantMdlInfo) ([]string, error) {
	lc, err := llm.GetLLMContainer()
	if err != nil {
		return nil, errors.Wrap(err, "get llm container")
	}

	modelPath := path.Join(api.LLM_VLLM_MODELS_PATH, pkgInfo.Name)
	checkCmd := fmt.Sprintf("[ -d '%s' ] && echo 'EXIST' || echo 'MISSING'", modelPath)
	output, err := exec(ctx, lc.CmpId, checkCmd, 10)
	if err != nil {
		return nil, errors.Wrap(err, "failed to check file existence")
	}

	if !strings.Contains(output, "EXIST") {
		return nil, errors.Errorf("model directory %s missing", modelPath)
	}

	return []string{modelPath}, nil
}

func (v *vllm) GetImageInternalPathMounts(sApp *models.SInstantModel) map[string]string {
	// Map host paths to container paths
	// For vLLM simple volume mount, this might be 1:1 or based on the base path
	res := make(map[string]string)
	for _, mount := range sApp.Mounts {
		relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
		res[relPath] = path.Join(api.LLM_VLLM, relPath)
	}
	return res
}

func (v *vllm) GetSaveDirectories(sApp *models.SInstantModel) (string, []string, error) {
	var filteredMounts []string
	for _, mount := range sApp.Mounts {
		if strings.HasPrefix(mount, api.LLM_VLLM_BASE_PATH) {
			relPath := strings.TrimPrefix(mount, api.LLM_VLLM_BASE_PATH)
			filteredMounts = append(filteredMounts, relPath)
		}
	}
	return "", filteredMounts, nil
}

func (v *vllm) ValidateMounts(mounts []string, mdlName string, mdlTag string) ([]string, error) {
	return mounts, nil
}

func (v *vllm) CheckDuplicateMounts(errStr string, dupIndex int) string {
	return "Duplicate mounts detected"
}

func (v *vllm) GetInstantModelIdByPostOverlay(postOverlay *commonapi.ContainerVolumeMountDiskPostOverlay, mdlNameToId map[string]string) string {
	return ""
}

func (v *vllm) GetDirPostOverlay(dir api.LLMMountDirInfo) *commonapi.ContainerVolumeMountDiskPostOverlay {
	uid := int64(1000)
	gid := int64(1000)
	ov := dir.ToOverlay()
	ov.FsUser = &uid
	ov.FsGroup = &gid
	return &ov
}

func (v *vllm) PreInstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
	lc, err := llm.GetLLMContainer()
	if err != nil {
		return errors.Wrap(err, "get llm container")
	}
	// Create base directory
	cmd := fmt.Sprintf("mkdir -p %s", api.LLM_VLLM_MODELS_PATH)
	_, err = exec(ctx, lc.CmpId, cmd, 10)
	return err
}

func (v *vllm) InstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, dirs []string, mdlIds []string) error {
	return nil
}

func (v *vllm) UninstallModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, instMdl *models.SLLMInstantModel) error {
	// Optionally remove the model directory
	// For safety, we might just log or leave it
	return nil
}

func resolveHfdRevision(modelTag string) string {
	if strings.TrimSpace(modelTag) == "" {
		return "main"
	}
	return strings.TrimSpace(modelTag)
}

type hfModelAPIResponse struct {
	Siblings []struct {
		RFilename string `json:"rfilename"`
	} `json:"siblings"`
}

func escapeURLPathPreserveSlash(p string) string {
	if p == "" {
		return ""
	}
	parts := strings.Split(p, "/")
	for i := range parts {
		parts[i] = url.PathEscape(parts[i])
	}
	return strings.Join(parts, "/")
}

func isNonEmptyFile(p string) bool {
	st, err := os.Stat(p)
	if err != nil {
		return false
	}
	return !st.IsDir() && st.Size() > 0
}

func (v *vllm) DownloadModel(ctx context.Context, userCred mcclient.TokenCredential, llm *models.SLLM, tmpDir string, modelName string, modelTag string) (string, []string, error) {
	// Download HF model on host into tmpDir for instant-model import.
	// We place files under tmpDir/huggingface/<repo> so that the archive contains relative paths.
	if strings.TrimSpace(tmpDir) == "" {
		return "", nil, errors.Error("tmpDir is empty")
	}
	if strings.TrimSpace(modelName) == "" {
		return "", nil, errors.Error("modelName is empty")
	}

	modelBase := filepath.Base(modelName)
	localDir := filepath.Join(tmpDir, "huggingface", modelBase)
	if err := os.MkdirAll(localDir, 0755); err != nil {
		return "", nil, errors.Wrap(err, "mkdir local model dir")
	}
	// If already downloaded, short-circuit (directory exists and non-empty).
	if entries, err := os.ReadDir(localDir); err == nil && len(entries) > 0 {
		targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
		log.Infof("Model %s already exists in import dir %s", modelName, localDir)
		return modelName, []string{targetDir}, nil
	}

	rev := resolveHfdRevision(modelTag)
	apiURL := fmt.Sprintf("%s/api/models/%s?revision=%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.QueryEscape(rev))
	log.Infof("Downloading HF model via HF Mirror API: %s", func() string {
		b, _ := json.Marshal(map[string]string{
			"model":    modelName,
			"revision": rev,
			"dir":      localDir,
			"endpoint": api.LLM_VLLM_HF_ENDPOINT,
			"api":      apiURL,
		})
		return string(b)
	}())
	metaBody, err := llm.HttpGet(ctx, apiURL)
	if err != nil {
		return "", nil, errors.Wrapf(err, "fetch hf model metadata failed: %s", apiURL)
	}
	meta := hfModelAPIResponse{}
	if err := json.Unmarshal(metaBody, &meta); err != nil {
		return "", nil, errors.Wrap(err, "unmarshal hf model metadata")
	}
	if len(meta.Siblings) == 0 {
		return "", nil, errors.Errorf("hf model metadata has no siblings: %s", apiURL)
	}

	for _, s := range meta.Siblings {
		rf := strings.TrimSpace(s.RFilename)
		if rf == "" {
			continue
		}
		dst := filepath.Join(localDir, filepath.FromSlash(rf))
		if isNonEmptyFile(dst) {
			continue
		}
		if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
			return "", nil, errors.Wrapf(err, "mkdir for %s", dst)
		}
		fileURL := fmt.Sprintf("%s/%s/resolve/%s/%s", api.LLM_VLLM_HF_ENDPOINT, escapeURLPathPreserveSlash(modelName), url.PathEscape(rev), escapeURLPathPreserveSlash(rf))
		if err := llm.HttpDownloadFile(ctx, fileURL, dst); err != nil {
			return "", nil, errors.Wrapf(err, "download file failed: %s -> %s", fileURL, dst)
		}
	}

	targetDir := path.Join(api.LLM_VLLM_MODELS_PATH, modelBase)
	return modelName, []string{targetDir}, nil
}