| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720 |
- /*
- Copyright The containerd Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package containerd
- import (
- "bytes"
- "context"
- "encoding/json"
- "errors"
- "fmt"
- "io"
- goruntime "runtime"
- "strings"
- "syscall"
- "time"
- "github.com/containerd/containerd/api/services/tasks/v1"
- "github.com/containerd/containerd/api/types"
- "github.com/containerd/containerd/cio"
- "github.com/containerd/containerd/content"
- "github.com/containerd/containerd/diff"
- "github.com/containerd/containerd/errdefs"
- "github.com/containerd/containerd/images"
- "github.com/containerd/containerd/mount"
- "github.com/containerd/containerd/oci"
- "github.com/containerd/containerd/plugin"
- "github.com/containerd/containerd/protobuf"
- google_protobuf "github.com/containerd/containerd/protobuf/types"
- "github.com/containerd/containerd/rootfs"
- "github.com/containerd/containerd/runtime/linux/runctypes"
- "github.com/containerd/containerd/runtime/v2/runc/options"
- "github.com/containerd/typeurl/v2"
- digest "github.com/opencontainers/go-digest"
- is "github.com/opencontainers/image-spec/specs-go"
- v1 "github.com/opencontainers/image-spec/specs-go/v1"
- specs "github.com/opencontainers/runtime-spec/specs-go"
- )
- // UnknownExitStatus is returned when containerd is unable to
- // determine the exit status of a process. This can happen if the process never starts
- // or if an error was encountered when obtaining the exit status, it is set to 255.
- const UnknownExitStatus = 255
- const (
- checkpointDateFormat = "01-02-2006-15:04:05"
- checkpointNameFormat = "containerd.io/checkpoint/%s:%s"
- )
- // Status returns process status and exit information
- type Status struct {
- // Status of the process
- Status ProcessStatus
- // ExitStatus returned by the process
- ExitStatus uint32
- // ExitedTime is the time at which the process died
- ExitTime time.Time
- }
- // ProcessInfo provides platform specific process information
- type ProcessInfo struct {
- // Pid is the process ID
- Pid uint32
- // Info includes additional process information
- // Info varies by platform
- Info *google_protobuf.Any
- }
- // ProcessStatus returns a human readable status for the Process representing its current status
- type ProcessStatus string
- const (
- // Running indicates the process is currently executing
- Running ProcessStatus = "running"
- // Created indicates the process has been created within containerd but the
- // user's defined process has not started
- Created ProcessStatus = "created"
- // Stopped indicates that the process has ran and exited
- Stopped ProcessStatus = "stopped"
- // Paused indicates that the process is currently paused
- Paused ProcessStatus = "paused"
- // Pausing indicates that the process is currently switching from a
- // running state into a paused state
- Pausing ProcessStatus = "pausing"
- // Unknown indicates that we could not determine the status from the runtime
- Unknown ProcessStatus = "unknown"
- )
- // IOCloseInfo allows specific io pipes to be closed on a process
- type IOCloseInfo struct {
- Stdin bool
- }
- // IOCloserOpts allows the caller to set specific pipes as closed on a process
- type IOCloserOpts func(*IOCloseInfo)
- // WithStdinCloser closes the stdin of a process
- func WithStdinCloser(r *IOCloseInfo) {
- r.Stdin = true
- }
- // CheckpointTaskInfo allows specific checkpoint information to be set for the task
- type CheckpointTaskInfo struct {
- Name string
- // ParentCheckpoint is the digest of a parent checkpoint
- ParentCheckpoint digest.Digest
- // Options hold runtime specific settings for checkpointing a task
- Options interface{}
- runtime string
- }
- // Runtime name for the container
- func (i *CheckpointTaskInfo) Runtime() string {
- return i.runtime
- }
- // CheckpointTaskOpts allows the caller to set checkpoint options
- type CheckpointTaskOpts func(*CheckpointTaskInfo) error
- // TaskInfo sets options for task creation
- type TaskInfo struct {
- // Checkpoint is the Descriptor for an existing checkpoint that can be used
- // to restore a task's runtime and memory state
- Checkpoint *types.Descriptor
- // RootFS is a list of mounts to use as the task's root filesystem
- RootFS []mount.Mount
- // Options hold runtime specific settings for task creation
- Options interface{}
- // RuntimePath is an absolute path that can be used to overwrite path
- // to a shim runtime binary.
- RuntimePath string
- // runtime is the runtime name for the container, and cannot be changed.
- runtime string
- }
- // Runtime name for the container
- func (i *TaskInfo) Runtime() string {
- return i.runtime
- }
- // Task is the executable object within containerd
- type Task interface {
- Process
- // Pause suspends the execution of the task
- Pause(context.Context) error
- // Resume the execution of the task
- Resume(context.Context) error
- // Exec creates a new process inside the task
- Exec(context.Context, string, *specs.Process, cio.Creator) (Process, error)
- // Pids returns a list of system specific process ids inside the task
- Pids(context.Context) ([]ProcessInfo, error)
- // Checkpoint serializes the runtime and memory information of a task into an
- // OCI Index that can be pushed and pulled from a remote resource.
- //
- // Additional software like CRIU maybe required to checkpoint and restore tasks
- // NOTE: Checkpoint supports to dump task information to a directory, in this way,
- // an empty OCI Index will be returned.
- Checkpoint(context.Context, ...CheckpointTaskOpts) (Image, error)
- // Update modifies executing tasks with updated settings
- Update(context.Context, ...UpdateTaskOpts) error
- // LoadProcess loads a previously created exec'd process
- LoadProcess(context.Context, string, cio.Attach) (Process, error)
- // Metrics returns task metrics for runtime specific metrics
- //
- // The metric types are generic to containerd and change depending on the runtime
- // For the built in Linux runtime, github.com/containerd/cgroups.Metrics
- // are returned in protobuf format
- Metrics(context.Context) (*types.Metric, error)
- // Spec returns the current OCI specification for the task
- Spec(context.Context) (*oci.Spec, error)
- }
- var _ = (Task)(&task{})
- type task struct {
- client *Client
- c Container
- io cio.IO
- id string
- pid uint32
- }
- // Spec returns the current OCI specification for the task
- func (t *task) Spec(ctx context.Context) (*oci.Spec, error) {
- return t.c.Spec(ctx)
- }
- // ID of the task
- func (t *task) ID() string {
- return t.id
- }
- // Pid returns the pid or process id for the task
- func (t *task) Pid() uint32 {
- return t.pid
- }
- func (t *task) Start(ctx context.Context) error {
- r, err := t.client.TaskService().Start(ctx, &tasks.StartRequest{
- ContainerID: t.id,
- })
- if err != nil {
- if t.io != nil {
- t.io.Cancel()
- t.io.Close()
- }
- return errdefs.FromGRPC(err)
- }
- t.pid = r.Pid
- return nil
- }
- func (t *task) Kill(ctx context.Context, s syscall.Signal, opts ...KillOpts) error {
- var i KillInfo
- for _, o := range opts {
- if err := o(ctx, &i); err != nil {
- return err
- }
- }
- _, err := t.client.TaskService().Kill(ctx, &tasks.KillRequest{
- Signal: uint32(s),
- ContainerID: t.id,
- ExecID: i.ExecID,
- All: i.All,
- })
- if err != nil {
- return errdefs.FromGRPC(err)
- }
- return nil
- }
- func (t *task) Pause(ctx context.Context) error {
- _, err := t.client.TaskService().Pause(ctx, &tasks.PauseTaskRequest{
- ContainerID: t.id,
- })
- return errdefs.FromGRPC(err)
- }
- func (t *task) Resume(ctx context.Context) error {
- _, err := t.client.TaskService().Resume(ctx, &tasks.ResumeTaskRequest{
- ContainerID: t.id,
- })
- return errdefs.FromGRPC(err)
- }
- func (t *task) Status(ctx context.Context) (Status, error) {
- r, err := t.client.TaskService().Get(ctx, &tasks.GetRequest{
- ContainerID: t.id,
- })
- if err != nil {
- return Status{}, errdefs.FromGRPC(err)
- }
- return Status{
- Status: ProcessStatus(strings.ToLower(r.Process.Status.String())),
- ExitStatus: r.Process.ExitStatus,
- ExitTime: protobuf.FromTimestamp(r.Process.ExitedAt),
- }, nil
- }
- func (t *task) Wait(ctx context.Context) (<-chan ExitStatus, error) {
- c := make(chan ExitStatus, 1)
- go func() {
- defer close(c)
- r, err := t.client.TaskService().Wait(ctx, &tasks.WaitRequest{
- ContainerID: t.id,
- })
- if err != nil {
- c <- ExitStatus{
- code: UnknownExitStatus,
- err: err,
- }
- return
- }
- c <- ExitStatus{
- code: r.ExitStatus,
- exitedAt: protobuf.FromTimestamp(r.ExitedAt),
- }
- }()
- return c, nil
- }
- // Delete deletes the task and its runtime state
- // it returns the exit status of the task and any errors that were encountered
- // during cleanup
- func (t *task) Delete(ctx context.Context, opts ...ProcessDeleteOpts) (*ExitStatus, error) {
- for _, o := range opts {
- if err := o(ctx, t); err != nil {
- return nil, err
- }
- }
- status, err := t.Status(ctx)
- if err != nil && errdefs.IsNotFound(err) {
- return nil, err
- }
- switch status.Status {
- case Stopped, Unknown, "":
- case Created:
- if t.client.runtime == fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "windows") {
- // On windows Created is akin to Stopped
- break
- }
- if t.pid == 0 {
- // allow for deletion of created tasks with PID 0
- // https://github.com/containerd/containerd/issues/7357
- break
- }
- fallthrough
- default:
- return nil, fmt.Errorf("task must be stopped before deletion: %s: %w", status.Status, errdefs.ErrFailedPrecondition)
- }
- if t.io != nil {
- // io.Wait locks for restored tasks on Windows unless we call
- // io.Close first (https://github.com/containerd/containerd/issues/5621)
- // in other cases, preserve the contract and let IO finish before closing
- if t.client.runtime == fmt.Sprintf("%s.%s", plugin.RuntimePlugin, "windows") {
- t.io.Close()
- }
- // io.Cancel is used to cancel the io goroutine while it is in
- // fifo-opening state. It does not stop the pipes since these
- // should be closed on the shim's side, otherwise we might lose
- // data from the container!
- t.io.Cancel()
- t.io.Wait()
- }
- r, err := t.client.TaskService().Delete(ctx, &tasks.DeleteTaskRequest{
- ContainerID: t.id,
- })
- if err != nil {
- return nil, errdefs.FromGRPC(err)
- }
- // Only cleanup the IO after a successful Delete
- if t.io != nil {
- t.io.Close()
- }
- return &ExitStatus{code: r.ExitStatus, exitedAt: protobuf.FromTimestamp(r.ExitedAt)}, nil
- }
- func (t *task) Exec(ctx context.Context, id string, spec *specs.Process, ioCreate cio.Creator) (_ Process, err error) {
- if id == "" {
- return nil, fmt.Errorf("exec id must not be empty: %w", errdefs.ErrInvalidArgument)
- }
- i, err := ioCreate(id)
- if err != nil {
- return nil, err
- }
- defer func() {
- if err != nil && i != nil {
- i.Cancel()
- i.Close()
- }
- }()
- any, err := protobuf.MarshalAnyToProto(spec)
- if err != nil {
- return nil, err
- }
- cfg := i.Config()
- request := &tasks.ExecProcessRequest{
- ContainerID: t.id,
- ExecID: id,
- Terminal: cfg.Terminal,
- Stdin: cfg.Stdin,
- Stdout: cfg.Stdout,
- Stderr: cfg.Stderr,
- Spec: any,
- }
- if _, err := t.client.TaskService().Exec(ctx, request); err != nil {
- i.Cancel()
- i.Wait()
- i.Close()
- return nil, errdefs.FromGRPC(err)
- }
- return &process{
- id: id,
- task: t,
- io: i,
- }, nil
- }
- func (t *task) Pids(ctx context.Context) ([]ProcessInfo, error) {
- response, err := t.client.TaskService().ListPids(ctx, &tasks.ListPidsRequest{
- ContainerID: t.id,
- })
- if err != nil {
- return nil, errdefs.FromGRPC(err)
- }
- var processList []ProcessInfo
- for _, p := range response.Processes {
- processList = append(processList, ProcessInfo{
- Pid: p.Pid,
- Info: p.Info,
- })
- }
- return processList, nil
- }
- func (t *task) CloseIO(ctx context.Context, opts ...IOCloserOpts) error {
- r := &tasks.CloseIORequest{
- ContainerID: t.id,
- }
- var i IOCloseInfo
- for _, o := range opts {
- o(&i)
- }
- r.Stdin = i.Stdin
- _, err := t.client.TaskService().CloseIO(ctx, r)
- return errdefs.FromGRPC(err)
- }
- func (t *task) IO() cio.IO {
- return t.io
- }
- func (t *task) Resize(ctx context.Context, w, h uint32) error {
- _, err := t.client.TaskService().ResizePty(ctx, &tasks.ResizePtyRequest{
- ContainerID: t.id,
- Width: w,
- Height: h,
- })
- return errdefs.FromGRPC(err)
- }
- // NOTE: Checkpoint supports to dump task information to a directory, in this way, an empty
- // OCI Index will be returned.
- func (t *task) Checkpoint(ctx context.Context, opts ...CheckpointTaskOpts) (Image, error) {
- ctx, done, err := t.client.WithLease(ctx)
- if err != nil {
- return nil, err
- }
- defer done(ctx)
- cr, err := t.client.ContainerService().Get(ctx, t.id)
- if err != nil {
- return nil, err
- }
- request := &tasks.CheckpointTaskRequest{
- ContainerID: t.id,
- }
- i := CheckpointTaskInfo{
- runtime: cr.Runtime.Name,
- }
- for _, o := range opts {
- if err := o(&i); err != nil {
- return nil, err
- }
- }
- // set a default name
- if i.Name == "" {
- i.Name = fmt.Sprintf(checkpointNameFormat, t.id, time.Now().Format(checkpointDateFormat))
- }
- request.ParentCheckpoint = i.ParentCheckpoint.String()
- if i.Options != nil {
- any, err := protobuf.MarshalAnyToProto(i.Options)
- if err != nil {
- return nil, err
- }
- request.Options = any
- }
- status, err := t.Status(ctx)
- if err != nil {
- return nil, err
- }
- if status.Status != Paused {
- // make sure we pause it and resume after all other filesystem operations are completed
- if err := t.Pause(ctx); err != nil {
- return nil, err
- }
- defer t.Resume(ctx)
- }
- index := v1.Index{
- Versioned: is.Versioned{
- SchemaVersion: 2,
- },
- Annotations: make(map[string]string),
- }
- if err := t.checkpointTask(ctx, &index, request); err != nil {
- return nil, err
- }
- // if checkpoint image path passed, jump checkpoint image,
- // return an empty image
- if isCheckpointPathExist(cr.Runtime.Name, i.Options) {
- return NewImage(t.client, images.Image{}), nil
- }
- if cr.Image != "" {
- if err := t.checkpointImage(ctx, &index, cr.Image); err != nil {
- return nil, err
- }
- index.Annotations["image.name"] = cr.Image
- }
- if cr.SnapshotKey != "" {
- if err := t.checkpointRWSnapshot(ctx, &index, cr.Snapshotter, cr.SnapshotKey); err != nil {
- return nil, err
- }
- }
- desc, err := t.writeIndex(ctx, &index)
- if err != nil {
- return nil, err
- }
- im := images.Image{
- Name: i.Name,
- Target: desc,
- Labels: map[string]string{
- "containerd.io/checkpoint": "true",
- },
- }
- if im, err = t.client.ImageService().Create(ctx, im); err != nil {
- return nil, err
- }
- return NewImage(t.client, im), nil
- }
- // UpdateTaskInfo allows updated specific settings to be changed on a task
- type UpdateTaskInfo struct {
- // Resources updates a tasks resource constraints
- Resources interface{}
- // Annotations allows arbitrary and/or experimental resource constraints for task update
- Annotations map[string]string
- }
- // UpdateTaskOpts allows a caller to update task settings
- type UpdateTaskOpts func(context.Context, *Client, *UpdateTaskInfo) error
- func (t *task) Update(ctx context.Context, opts ...UpdateTaskOpts) error {
- request := &tasks.UpdateTaskRequest{
- ContainerID: t.id,
- }
- var i UpdateTaskInfo
- for _, o := range opts {
- if err := o(ctx, t.client, &i); err != nil {
- return err
- }
- }
- if i.Resources != nil {
- any, err := typeurl.MarshalAny(i.Resources)
- if err != nil {
- return err
- }
- request.Resources = protobuf.FromAny(any)
- }
- if i.Annotations != nil {
- request.Annotations = i.Annotations
- }
- _, err := t.client.TaskService().Update(ctx, request)
- return errdefs.FromGRPC(err)
- }
- func (t *task) LoadProcess(ctx context.Context, id string, ioAttach cio.Attach) (Process, error) {
- if id == t.id && ioAttach == nil {
- return t, nil
- }
- response, err := t.client.TaskService().Get(ctx, &tasks.GetRequest{
- ContainerID: t.id,
- ExecID: id,
- })
- if err != nil {
- err = errdefs.FromGRPC(err)
- if errdefs.IsNotFound(err) {
- return nil, fmt.Errorf("no running process found: %w", err)
- }
- return nil, err
- }
- var i cio.IO
- if ioAttach != nil {
- if i, err = attachExistingIO(response, ioAttach); err != nil {
- return nil, err
- }
- }
- return &process{
- id: id,
- task: t,
- io: i,
- }, nil
- }
- func (t *task) Metrics(ctx context.Context) (*types.Metric, error) {
- response, err := t.client.TaskService().Metrics(ctx, &tasks.MetricsRequest{
- Filters: []string{
- "id==" + t.id,
- },
- })
- if err != nil {
- return nil, errdefs.FromGRPC(err)
- }
- if response.Metrics == nil {
- _, err := t.Status(ctx)
- if err != nil && errdefs.IsNotFound(err) {
- return nil, err
- }
- return nil, errors.New("no metrics received")
- }
- return response.Metrics[0], nil
- }
- func (t *task) checkpointTask(ctx context.Context, index *v1.Index, request *tasks.CheckpointTaskRequest) error {
- response, err := t.client.TaskService().Checkpoint(ctx, request)
- if err != nil {
- return errdefs.FromGRPC(err)
- }
- // NOTE: response.Descriptors can be an empty slice if checkpoint image is jumped
- // add the checkpoint descriptors to the index
- for _, d := range response.Descriptors {
- index.Manifests = append(index.Manifests, v1.Descriptor{
- MediaType: d.MediaType,
- Size: d.Size,
- Digest: digest.Digest(d.Digest),
- Platform: &v1.Platform{
- OS: goruntime.GOOS,
- Architecture: goruntime.GOARCH,
- },
- Annotations: d.Annotations,
- })
- }
- return nil
- }
- func (t *task) checkpointRWSnapshot(ctx context.Context, index *v1.Index, snapshotterName string, id string) error {
- opts := []diff.Opt{
- diff.WithReference(fmt.Sprintf("checkpoint-rw-%s", id)),
- }
- rw, err := rootfs.CreateDiff(ctx, id, t.client.SnapshotService(snapshotterName), t.client.DiffService(), opts...)
- if err != nil {
- return err
- }
- rw.Platform = &v1.Platform{
- OS: goruntime.GOOS,
- Architecture: goruntime.GOARCH,
- }
- index.Manifests = append(index.Manifests, rw)
- return nil
- }
- func (t *task) checkpointImage(ctx context.Context, index *v1.Index, image string) error {
- if image == "" {
- return fmt.Errorf("cannot checkpoint image with empty name")
- }
- ir, err := t.client.ImageService().Get(ctx, image)
- if err != nil {
- return err
- }
- index.Manifests = append(index.Manifests, ir.Target)
- return nil
- }
- func (t *task) writeIndex(ctx context.Context, index *v1.Index) (d v1.Descriptor, err error) {
- labels := map[string]string{}
- for i, m := range index.Manifests {
- labels[fmt.Sprintf("containerd.io/gc.ref.content.%d", i)] = m.Digest.String()
- }
- buf := bytes.NewBuffer(nil)
- if err := json.NewEncoder(buf).Encode(index); err != nil {
- return v1.Descriptor{}, err
- }
- return writeContent(ctx, t.client.ContentStore(), v1.MediaTypeImageIndex, t.id, buf, content.WithLabels(labels))
- }
- func writeContent(ctx context.Context, store content.Ingester, mediaType, ref string, r io.Reader, opts ...content.Opt) (d v1.Descriptor, err error) {
- writer, err := store.Writer(ctx, content.WithRef(ref))
- if err != nil {
- return d, err
- }
- defer writer.Close()
- size, err := io.Copy(writer, r)
- if err != nil {
- return d, err
- }
- if err := writer.Commit(ctx, size, "", opts...); err != nil {
- if !errdefs.IsAlreadyExists(err) {
- return d, err
- }
- }
- return v1.Descriptor{
- MediaType: mediaType,
- Digest: writer.Digest(),
- Size: size,
- }, nil
- }
- // isCheckpointPathExist only suitable for runc runtime now
- func isCheckpointPathExist(runtime string, v interface{}) bool {
- if v == nil {
- return false
- }
- switch runtime {
- case plugin.RuntimeRuncV1, plugin.RuntimeRuncV2:
- if opts, ok := v.(*options.CheckpointOptions); ok && opts.ImagePath != "" {
- return true
- }
- case plugin.RuntimeLinuxV1:
- if opts, ok := v.(*runctypes.CheckpointOptions); ok && opts.ImagePath != "" {
- return true
- }
- }
- return false
- }
|