zhouyuhuan
/
Cloudpods


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
							package webseed

import (
	"bytes"
	"context"
	"errors"
	"fmt"
	"io"
	"log"
	"net/http"
	"strings"

	"github.com/RoaringBitmap/roaring"

	"github.com/anacrolix/torrent/common"
	"github.com/anacrolix/torrent/metainfo"
	"github.com/anacrolix/torrent/segments"
)

type RequestSpec = segments.Extent

type requestPart struct {
	req *http.Request
	e   segments.Extent
	do  func() (*http.Response, error)
	// Wrap http response bodies for such things as download rate limiting.
	responseBodyWrapper ResponseBodyWrapper
}

type Request struct {
	cancel func()
	Result chan RequestResult
}

func (r Request) Cancel() {
	r.cancel()
}

type Client struct {
	HttpClient *http.Client
	Url        string
	fileIndex  segments.Index
	info       *metainfo.Info
	// The pieces we can request with the Url. We're more likely to ban/block at the file-level
	// given that's how requests are mapped to webseeds, but the torrent.Client works at the piece
	// level. We can map our file-level adjustments to the pieces here. This probably need to be
	// private in the future, if Client ever starts removing pieces.
	Pieces              roaring.Bitmap
	ResponseBodyWrapper ResponseBodyWrapper
	PathEscaper         PathEscaper
}

type ResponseBodyWrapper func(io.Reader) io.Reader

func (me *Client) SetInfo(info *metainfo.Info) {
	if !strings.HasSuffix(me.Url, "/") && info.IsDir() {
		// In my experience, this is a non-conforming webseed. For example the
		// http://ia600500.us.archive.org/1/items URLs in archive.org torrents.
		return
	}
	me.fileIndex = segments.NewIndexFromSegments(common.TorrentOffsetFileSegments(info))
	me.info = info
	me.Pieces.AddRange(0, uint64(info.NumPieces()))
}

type RequestResult struct {
	Bytes []byte
	Err   error
}

func (ws *Client) StartNewRequest(r RequestSpec) Request {
	ctx, cancel := context.WithCancel(context.TODO())
	var requestParts []requestPart
	if !ws.fileIndex.Locate(r, func(i int, e segments.Extent) bool {
		req, err := newRequest(
			ctx,
			ws.Url, i, ws.info, e.Start, e.Length,
			ws.PathEscaper,
		)
		if err != nil {
			panic(err)
		}
		part := requestPart{
			req:                 req,
			e:                   e,
			responseBodyWrapper: ws.ResponseBodyWrapper,
		}
		part.do = func() (*http.Response, error) {
			return ws.HttpClient.Do(req)
		}
		requestParts = append(requestParts, part)
		return true
	}) {
		panic("request out of file bounds")
	}
	req := Request{
		cancel: cancel,
		Result: make(chan RequestResult, 1),
	}
	go func() {
		b, err := readRequestPartResponses(ctx, requestParts)
		req.Result <- RequestResult{
			Bytes: b,
			Err:   err,
		}
	}()
	return req
}

type ErrBadResponse struct {
	Msg      string
	Response *http.Response
}

func (me ErrBadResponse) Error() string {
	return me.Msg
}

func recvPartResult(ctx context.Context, buf io.Writer, part requestPart, resp *http.Response) error {
	defer resp.Body.Close()
	var body io.Reader = resp.Body
	if part.responseBodyWrapper != nil {
		body = part.responseBodyWrapper(body)
	}
	// Prevent further accidental use
	resp.Body = nil
	if ctx.Err() != nil {
		return ctx.Err()
	}
	switch resp.StatusCode {
	case http.StatusPartialContent:
		copied, err := io.Copy(buf, body)
		if err != nil {
			return err
		}
		if copied != part.e.Length {
			return fmt.Errorf("got %v bytes, expected %v", copied, part.e.Length)
		}
		return nil
	case http.StatusOK:
		// This number is based on
		// https://archive.org/download/BloodyPitOfHorror/BloodyPitOfHorror.asr.srt. It seems that
		// archive.org might be using a webserver implementation that refuses to do partial
		// responses to small files.
		if part.e.Start < 48<<10 {
			if part.e.Start != 0 {
				log.Printf("resp status ok but requested range [url=%q, range=%q]",
					part.req.URL,
					part.req.Header.Get("Range"))
			}
			// Instead of discarding, we could try receiving all the chunks present in the response
			// body. I don't know how one would handle multiple chunk requests resulting in an OK
			// response for the same file. The request algorithm might be need to be smarter for
			// that.
			discarded, _ := io.CopyN(io.Discard, body, part.e.Start)
			if discarded != 0 {
				log.Printf("discarded %v bytes in webseed request response part", discarded)
			}
			_, err := io.CopyN(buf, body, part.e.Length)
			return err
		} else {
			return ErrBadResponse{"resp status ok but requested range", resp}
		}
	case http.StatusServiceUnavailable:
		return ErrTooFast
	default:
		return ErrBadResponse{
			fmt.Sprintf("unhandled response status code (%v)", resp.StatusCode),
			resp,
		}
	}
}

var ErrTooFast = errors.New("making requests too fast")

func readRequestPartResponses(ctx context.Context, parts []requestPart) (_ []byte, err error) {
	var buf bytes.Buffer
	for _, part := range parts {
		var resp *http.Response
		resp, err = part.do()

		if err == nil {
			err = recvPartResult(ctx, &buf, part, resp)
		}

		if err != nil {
			err = fmt.Errorf("reading %q at %q: %w", part.req.URL, part.req.Header.Get("Range"), err)
			break
		}
	}
	return buf.Bytes(), err
}