Move archive function to repo_model and gitrepo (#35514)

This commit is contained in:
Lunny Xiao
2025-10-06 15:01:26 -07:00
committed by GitHub
parent cdc0733047
commit ad2ff67343
9 changed files with 172 additions and 182 deletions

View File

@@ -33,8 +33,8 @@ import (
// This is entirely opaque to external entities, though, and mostly used as a
// handle elsewhere.
type ArchiveRequest struct {
RepoID int64
Type git.ArchiveType
Repo *repo_model.Repository
Type repo_model.ArchiveType
CommitID string
archiveRefShortName string // the ref short name to download the archive, for example: "master", "v1.0.0", "commit id"
@@ -74,20 +74,20 @@ func (e RepoRefNotFoundError) Is(err error) bool {
// NewRequest creates an archival request, based on the URI. The
// resulting ArchiveRequest is suitable for being passed to Await()
// if it's determined that the request still needs to be satisfied.
func NewRequest(repoID int64, repo *git.Repository, archiveRefExt string) (*ArchiveRequest, error) {
func NewRequest(repo *repo_model.Repository, gitRepo *git.Repository, archiveRefExt string) (*ArchiveRequest, error) {
// here the archiveRefShortName is not a clear ref, it could be a tag, branch or commit id
archiveRefShortName, archiveType := git.SplitArchiveNameType(archiveRefExt)
if archiveType == git.ArchiveUnknown {
archiveRefShortName, archiveType := repo_model.SplitArchiveNameType(archiveRefExt)
if archiveType == repo_model.ArchiveUnknown {
return nil, ErrUnknownArchiveFormat{archiveRefExt}
}
// Get corresponding commit.
commitID, err := repo.ConvertToGitID(archiveRefShortName)
commitID, err := gitRepo.ConvertToGitID(archiveRefShortName)
if err != nil {
return nil, RepoRefNotFoundError{RefShortName: archiveRefShortName}
}
r := &ArchiveRequest{RepoID: repoID, archiveRefShortName: archiveRefShortName, Type: archiveType}
r := &ArchiveRequest{Repo: repo, archiveRefShortName: archiveRefShortName, Type: archiveType}
r.CommitID = commitID.String()
return r, nil
}
@@ -105,7 +105,7 @@ func (aReq *ArchiveRequest) GetArchiveName() string {
// context is cancelled/times out a started archiver will still continue to run
// in the background.
func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) {
archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
archiver, err := repo_model.GetRepoArchiver(ctx, aReq.Repo.ID, aReq.Type, aReq.CommitID)
if err != nil {
return nil, fmt.Errorf("models.GetRepoArchiver: %w", err)
}
@@ -130,7 +130,7 @@ func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver
case <-ctx.Done():
return nil, ctx.Err()
case <-poll.C:
archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID)
archiver, err = repo_model.GetRepoArchiver(ctx, aReq.Repo.ID, aReq.Type, aReq.CommitID)
if err != nil {
return nil, fmt.Errorf("repo_model.GetRepoArchiver: %w", err)
}
@@ -143,17 +143,19 @@ func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver
// Stream satisfies the ArchiveRequest being passed in. Processing
// will occur directly in this routine.
func (aReq *ArchiveRequest) Stream(ctx context.Context, gitRepo *git.Repository, w io.Writer) error {
if aReq.Type == git.ArchiveBundle {
return gitRepo.CreateBundle(
func (aReq *ArchiveRequest) Stream(ctx context.Context, w io.Writer) error {
if aReq.Type == repo_model.ArchiveBundle {
return gitrepo.CreateBundle(
ctx,
aReq.Repo,
aReq.CommitID,
w,
)
}
return gitRepo.CreateArchive(
return gitrepo.CreateArchive(
ctx,
aReq.Type,
aReq.Repo,
aReq.Type.String(),
w,
setting.Repository.PrefixArchiveFiles,
aReq.CommitID,
@@ -167,10 +169,10 @@ func (aReq *ArchiveRequest) Stream(ctx context.Context, gitRepo *git.Repository,
// being returned for completion, as it may be different than the one they passed
// in.
func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver, error) {
ctx, _, finished := process.GetManager().AddContext(ctx, fmt.Sprintf("ArchiveRequest[%d]: %s", r.RepoID, r.GetArchiveName()))
ctx, _, finished := process.GetManager().AddContext(ctx, fmt.Sprintf("ArchiveRequest[%s]: %s", r.Repo.FullName(), r.GetArchiveName()))
defer finished()
archiver, err := repo_model.GetRepoArchiver(ctx, r.RepoID, r.Type, r.CommitID)
archiver, err := repo_model.GetRepoArchiver(ctx, r.Repo.ID, r.Type, r.CommitID)
if err != nil {
return nil, err
}
@@ -183,7 +185,7 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver
}
} else {
archiver = &repo_model.RepoArchiver{
RepoID: r.RepoID,
RepoID: r.Repo.ID,
Type: r.Type,
CommitID: r.CommitID,
Status: repo_model.ArchiverGenerating,
@@ -215,28 +217,18 @@ func doArchive(ctx context.Context, r *ArchiveRequest) (*repo_model.RepoArchiver
_ = rd.Close()
}()
done := make(chan error, 1) // Ensure that there is some capacity which will ensure that the goroutine below can always finish
repo, err := repo_model.GetRepositoryByID(ctx, archiver.RepoID)
if err != nil {
return nil, fmt.Errorf("archiver.LoadRepo failed: %w", err)
}
gitRepo, err := gitrepo.OpenRepository(ctx, repo)
if err != nil {
return nil, err
}
defer gitRepo.Close()
go func(done chan error, w *io.PipeWriter, archiveReq *ArchiveRequest, gitRepo *git.Repository) {
go func(done chan error, w *io.PipeWriter, archiveReq *ArchiveRequest) {
defer func() {
if r := recover(); r != nil {
done <- fmt.Errorf("%v", r)
}
}()
err := archiveReq.Stream(ctx, gitRepo, w)
err := archiveReq.Stream(ctx, w)
_ = w.CloseWithError(err)
done <- err
}(done, w, r, gitRepo)
}(done, w, r)
// TODO: add lfs data to zip
// TODO: add submodule data to zip
@@ -347,20 +339,20 @@ func DeleteRepositoryArchives(ctx context.Context) error {
return storage.Clean(storage.RepoArchives)
}
func ServeRepoArchive(ctx *gitea_context.Base, repo *repo_model.Repository, gitRepo *git.Repository, archiveReq *ArchiveRequest) {
func ServeRepoArchive(ctx *gitea_context.Base, archiveReq *ArchiveRequest) {
// Add nix format link header so tarballs lock correctly:
// https://github.com/nixos/nix/blob/56763ff918eb308db23080e560ed2ea3e00c80a7/doc/manual/src/protocols/tarball-fetcher.md
ctx.Resp.Header().Add("Link", fmt.Sprintf(`<%s/archive/%s.%s?rev=%s>; rel="immutable"`,
repo.APIURL(),
archiveReq.Repo.APIURL(),
archiveReq.CommitID,
archiveReq.Type.String(),
archiveReq.CommitID,
))
downloadName := repo.Name + "-" + archiveReq.GetArchiveName()
downloadName := archiveReq.Repo.Name + "-" + archiveReq.GetArchiveName()
if setting.Repository.StreamArchives {
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{Filename: downloadName})
if err := archiveReq.Stream(ctx, gitRepo, ctx.Resp); err != nil && !ctx.Written() {
if err := archiveReq.Stream(ctx, ctx.Resp); err != nil && !ctx.Written() {
log.Error("Archive %v streaming failed: %v", archiveReq, err)
ctx.HTTPError(http.StatusInternalServerError)
}

View File

@@ -29,47 +29,47 @@ func TestArchive_Basic(t *testing.T) {
contexttest.LoadGitRepo(t, ctx)
defer ctx.Repo.GitRepo.Close()
bogusReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
bogusReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, firstCommit+".zip", bogusReq.GetArchiveName())
// Check a series of bogus requests.
// Step 1, valid commit with a bad extension.
bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".unknown")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".unknown")
assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 2, missing commit.
bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "dbffff.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "dbffff.zip")
assert.Error(t, err)
assert.Nil(t, bogusReq)
// Step 3, doesn't look like branch/tag/commit.
bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "db.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "db.zip")
assert.Error(t, err)
assert.Nil(t, bogusReq)
bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "master.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "master.zip")
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, "master.zip", bogusReq.GetArchiveName())
bogusReq, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, "test/archive.zip")
bogusReq, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, "test/archive.zip")
assert.NoError(t, err)
assert.NotNil(t, bogusReq)
assert.Equal(t, "test-archive.zip", bogusReq.GetArchiveName())
// Now two valid requests, firstCommit with valid extensions.
zipReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
assert.NoError(t, err)
assert.NotNil(t, zipReq)
tgzReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".tar.gz")
tgzReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".tar.gz")
assert.NoError(t, err)
assert.NotNil(t, tgzReq)
secondReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".bundle")
secondReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".bundle")
assert.NoError(t, err)
assert.NotNil(t, secondReq)
@@ -89,7 +89,7 @@ func TestArchive_Basic(t *testing.T) {
// Sleep two seconds to make sure the queue doesn't change.
time.Sleep(2 * time.Second)
zipReq2, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq2, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
assert.NoError(t, err)
// This zipReq should match what's sitting in the queue, as we haven't
// let it release yet. From the consumer's point of view, this looks like
@@ -104,12 +104,12 @@ func TestArchive_Basic(t *testing.T) {
// Now we'll submit a request and TimedWaitForCompletion twice, before and
// after we release it. We should trigger both the timeout and non-timeout
// cases.
timedReq, err := NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, secondCommit+".tar.gz")
timedReq, err := NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, secondCommit+".tar.gz")
assert.NoError(t, err)
assert.NotNil(t, timedReq)
doArchive(t.Context(), timedReq)
zipReq2, err = NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, firstCommit+".zip")
zipReq2, err = NewRequest(ctx.Repo.Repository, ctx.Repo.GitRepo, firstCommit+".zip")
assert.NoError(t, err)
// Now, we're guaranteed to have released the original zipReq from the queue.
// Ensure that we don't get handed back the released entry somehow, but they