Skip to content

Commit b1102c4

Browse files
committed
add lfs_oid and lfs_size
1 parent 857fa2b commit b1102c4

File tree

13 files changed

+176
-57
lines changed

13 files changed

+176
-57
lines changed

modules/git/blob.go

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"encoding/base64"
1010
"errors"
1111
"io"
12+
"strings"
1213

1314
"code.gitea.io/gitea/modules/typesniffer"
1415
"code.gitea.io/gitea/modules/util"
@@ -63,33 +64,37 @@ func (b *Blob) GetBlobLineCount(w io.Writer) (int, error) {
6364
}
6465
}
6566

66-
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string
67-
func (b *Blob) GetBlobContentBase64() (string, error) {
67+
// GetBlobContentBase64 Reads the content of the blob with a base64 encoding and returns the encoded string
68+
func (b *Blob) GetBlobContentBase64(originContent *strings.Builder) (string, error) {
6869
dataRc, err := b.DataAsync()
6970
if err != nil {
7071
return "", err
7172
}
7273
defer dataRc.Close()
7374

74-
pr, pw := io.Pipe()
75-
encoder := base64.NewEncoder(base64.StdEncoding, pw)
76-
77-
go func() {
78-
_, err := io.Copy(encoder, dataRc)
79-
_ = encoder.Close()
80-
81-
if err != nil {
82-
_ = pw.CloseWithError(err)
83-
} else {
84-
_ = pw.Close()
75+
base64buf := &strings.Builder{}
76+
encoder := base64.NewEncoder(base64.StdEncoding, base64buf)
77+
buf := make([]byte, 32*1024)
78+
loop:
79+
for {
80+
n, err := dataRc.Read(buf)
81+
if n > 0 {
82+
if originContent != nil {
83+
_, _ = originContent.Write(buf[:n])
84+
}
85+
if _, err := encoder.Write(buf[:n]); err != nil {
86+
return "", err
87+
}
88+
}
89+
switch {
90+
case errors.Is(err, io.EOF):
91+
break loop
92+
case err != nil:
93+
return "", err
8594
}
86-
}()
87-
88-
out, err := io.ReadAll(pr)
89-
if err != nil {
90-
return "", err
9195
}
92-
return string(out), nil
96+
_ = encoder.Close()
97+
return base64buf.String(), nil
9398
}
9499

95100
// GuessContentType guesses the content type of the blob.

modules/lfs/pointer.go

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ import (
1515
"strings"
1616
)
1717

18+
// spec: https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
1819
const (
19-
blobSizeCutoff = 1024
20+
MetaFileMaxSize = 1024 // spec says the maximum size of a pointer file must be smaller than 1024
2021

21-
// MetaFileIdentifier is the string appearing at the first line of LFS pointer files.
22-
// https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
23-
MetaFileIdentifier = "version https://git-lfs.github.com/spec/v1"
22+
MetaFileIdentifier = "version https://git-lfs.github.com/spec/v1" // the first line of a pointer file
2423

25-
// MetaFileOidPrefix appears in LFS pointer files on a line before the sha256 hash.
26-
MetaFileOidPrefix = "oid sha256:"
24+
MetaFileOidPrefix = "oid sha256:" // spec says the only supported hash is sha256 at the moment
2725
)
2826

2927
var (
@@ -39,7 +37,7 @@ var (
3937

4038
// ReadPointer tries to read LFS pointer data from the reader
4139
func ReadPointer(reader io.Reader) (Pointer, error) {
42-
buf := make([]byte, blobSizeCutoff)
40+
buf := make([]byte, MetaFileMaxSize)
4341
n, err := io.ReadFull(reader, buf)
4442
if err != nil && err != io.ErrUnexpectedEOF {
4543
return Pointer{}, err
@@ -65,6 +63,7 @@ func ReadPointerFromBuffer(buf []byte) (Pointer, error) {
6563
return p, ErrInvalidStructure
6664
}
6765

66+
// spec says "key/value pairs MUST be sorted alphabetically in ascending order (version is exception and must be the first)"
6867
oid := strings.TrimPrefix(splitLines[1], MetaFileOidPrefix)
6968
if len(oid) != 64 || !oidPattern.MatchString(oid) {
7069
return p, ErrInvalidOIDFormat

modules/lfs/pointer_scanner_gogit.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan c
3131
default:
3232
}
3333

34-
if blob.Size > blobSizeCutoff {
34+
if blob.Size > MetaFileMaxSize {
3535
return nil
3636
}
3737

modules/structs/git_blob.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,7 @@ type GitBlobResponse struct {
1010
URL string `json:"url"`
1111
SHA string `json:"sha"`
1212
Size int64 `json:"size"`
13+
14+
LfsOid *string `json:"lfs_oid,omitempty"`
15+
LfsSize *int64 `json:"lfs_size,omitempty"`
1316
}

modules/structs/repo_file.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ type ContentsResponse struct {
150150
// `submodule_git_url` is populated when `type` is `submodule`, otherwise null
151151
SubmoduleGitURL *string `json:"submodule_git_url"`
152152
Links *FileLinksResponse `json:"_links"`
153+
154+
LfsOid *string `json:"lfs_oid"`
155+
LfsSize *int64 `json:"lfs_size"`
153156
}
154157

155158
// FileCommitResponse contains information generated from a Git commit for a repo's file.

routers/api/v1/repo/blob.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func GetBlob(ctx *context.APIContext) {
4747
return
4848
}
4949

50-
if blob, err := files_service.GetBlobBySHA(ctx, ctx.Repo.Repository, ctx.Repo.GitRepo, sha); err != nil {
50+
if blob, err := files_service.GetBlobBySHA(ctx.Repo.Repository, ctx.Repo.GitRepo, sha); err != nil {
5151
ctx.APIError(http.StatusBadRequest, err)
5252
} else {
5353
ctx.JSON(http.StatusOK, blob)

routers/api/v1/repo/file.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -908,8 +908,9 @@ func resolveRefCommit(ctx *context.APIContext, ref string, minCommitIDLen ...int
908908
func GetContentsExt(ctx *context.APIContext) {
909909
// swagger:operation GET /repos/{owner}/{repo}/contents-ext/{filepath} repository repoGetContentsExt
910910
// ---
911-
// summary: The extended "contents" API, get file metadata and/or content, or list a directory.
911+
// summary: The extended "contents" API, to get file metadata and/or content, or list a directory.
912912
// description: It guarantees that only one of the response fields is set if the request succeeds.
913+
// Users can pass "includes=file_content" or "includes=lfs_metadata" to retrieve more fields.
913914
// produces:
914915
// - application/json
915916
// parameters:
@@ -930,12 +931,13 @@ func GetContentsExt(ctx *context.APIContext) {
930931
// required: true
931932
// - name: ref
932933
// in: query
933-
// description: "The name of the commit/branch/tag. Default to the repository’s default branch."
934+
// description: the name of the commit/branch/tag, default to the repository’s default branch.
934935
// type: string
935936
// required: false
936937
// - name: includes
937938
// in: query
938-
// description: Set it to "file_content" to retrieve the file content when requesting a file, otherwise the response only contains the file's metadata.
939+
// description: By default this API's response only contains file's metadata. Use comma-separated "includes" options to retrieve more fields.
940+
// Option "file_content" will try to retrieve the file content, option "lfs_metadata" will try to retrieve LFS metadata.
939941
// type: string
940942
// required: false
941943
// responses:
@@ -944,7 +946,6 @@ func GetContentsExt(ctx *context.APIContext) {
944946
// "404":
945947
// "$ref": "#/responses/notFound"
946948

947-
// TODO: add more includes options, like "lfs_content"
948949
opts := files_service.GetContentsOrListOptions{TreePath: ctx.PathParam("*")}
949950
for includeOpt := range strings.SplitSeq(ctx.FormString("includes"), ",") {
950951
if includeOpt == "" {
@@ -953,6 +954,8 @@ func GetContentsExt(ctx *context.APIContext) {
953954
switch includeOpt {
954955
case "file_content":
955956
opts.IncludeSingleFileContent = true
957+
case "lfs_metadata":
958+
opts.IncludeLfsMetadata = true
956959
default:
957960
ctx.APIError(http.StatusBadRequest, fmt.Sprintf("unknown include option %q", includeOpt))
958961
return
@@ -965,7 +968,8 @@ func GetContentsExt(ctx *context.APIContext) {
965968
func GetContents(ctx *context.APIContext) {
966969
// swagger:operation GET /repos/{owner}/{repo}/contents/{filepath} repository repoGetContents
967970
// ---
968-
// summary: Gets the metadata and contents (if a file) of an entry in a repository, or a list of entries if a dir
971+
// summary: Gets the metadata and contents (if a file) of an entry in a repository, or a list of entries if a dir.
972+
// description: This API follows GitHub's design, and it is not easy to use. Recommend to use our "contents-ext" API instead.
969973
// produces:
970974
// - application/json
971975
// parameters:
@@ -1021,7 +1025,8 @@ func getRepoContents(ctx *context.APIContext, opts files_service.GetContentsOrLi
10211025
func GetContentsList(ctx *context.APIContext) {
10221026
// swagger:operation GET /repos/{owner}/{repo}/contents repository repoGetContentsList
10231027
// ---
1024-
// summary: Gets the metadata of all the entries of the root dir
1028+
// summary: Gets the metadata of all the entries of the root dir.
1029+
// description: This API follows GitHub's design, and it is not easy to use. Recommend to use our "contents-ext" API instead.
10251030
// produces:
10261031
// - application/json
10271032
// parameters:

routers/api/v1/repo/wiki.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ func wikiContentsByEntry(ctx *context.APIContext, entry *git.TreeEntry) string {
499499
if blob.Size() > setting.API.DefaultMaxBlobSize {
500500
return ""
501501
}
502-
content, err := blob.GetBlobContentBase64()
502+
content, err := blob.GetBlobContentBase64(nil)
503503
if err != nil {
504504
ctx.APIErrorInternal(err)
505505
return ""

services/repository/files/content.go

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ package files
55

66
import (
77
"context"
8+
"io"
89
"net/url"
910
"path"
11+
"strings"
1012

1113
repo_model "code.gitea.io/gitea/models/repo"
1214
"code.gitea.io/gitea/modules/git"
15+
"code.gitea.io/gitea/modules/lfs"
1316
"code.gitea.io/gitea/modules/setting"
1417
api "code.gitea.io/gitea/modules/structs"
1518
"code.gitea.io/gitea/modules/util"
@@ -35,6 +38,7 @@ func (ct *ContentType) String() string {
3538
type GetContentsOrListOptions struct {
3639
TreePath string
3740
IncludeSingleFileContent bool // include the file's content when the tree path is a file
41+
IncludeLfsMetadata bool
3842
}
3943

4044
// GetContentsOrList gets the metadata of a file's contents (*ContentsResponse) if treePath not a tree
@@ -65,9 +69,10 @@ func GetContentsOrList(ctx context.Context, repo *repo_model.Repository, gitRepo
6569
}
6670
ret.DirContents = make([]*api.ContentsResponse, 0, len(entries))
6771
for _, e := range entries {
68-
// never include file content when listing a directory
69-
subTreePath := path.Join(opts.TreePath, e.Name())
70-
fileContentResponse, err := GetFileContents(ctx, repo, gitRepo, refCommit, GetContentsOrListOptions{TreePath: subTreePath, IncludeSingleFileContent: false})
72+
subOpts := opts
73+
subOpts.TreePath = path.Join(opts.TreePath, e.Name())
74+
subOpts.IncludeSingleFileContent = false // never include file content when listing a directory
75+
fileContentResponse, err := GetFileContents(ctx, repo, gitRepo, refCommit, subOpts)
7176
if err != nil {
7277
return ret, err
7378
}
@@ -118,7 +123,7 @@ func GetFileContents(ctx context.Context, repo *repo_model.Repository, gitRepo *
118123
return getFileContentsByEntryInternal(ctx, repo, gitRepo, refCommit, entry, opts)
119124
}
120125

121-
func getFileContentsByEntryInternal(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, refCommit *utils.RefCommit, entry *git.TreeEntry, opts GetContentsOrListOptions) (*api.ContentsResponse, error) {
126+
func getFileContentsByEntryInternal(_ context.Context, repo *repo_model.Repository, gitRepo *git.Repository, refCommit *utils.RefCommit, entry *git.TreeEntry, opts GetContentsOrListOptions) (*api.ContentsResponse, error) {
122127
refType := refCommit.RefName.RefType()
123128
commit := refCommit.Commit
124129
selfURL, err := url.Parse(repo.APIURL() + "/contents/" + util.PathEscapeSegments(opts.TreePath) + "?ref=" + url.QueryEscape(refCommit.InputRef))
@@ -164,12 +169,17 @@ func getFileContentsByEntryInternal(ctx context.Context, repo *repo_model.Reposi
164169
contentsResponse.Type = string(ContentTypeRegular)
165170
// if it is listing the repo root dir, don't waste system resources on reading content
166171
if opts.IncludeSingleFileContent {
167-
blobResponse, err := GetBlobBySHA(ctx, repo, gitRepo, entry.ID.String())
172+
blobResponse, err := GetBlobBySHA(repo, gitRepo, entry.ID.String())
173+
if err != nil {
174+
return nil, err
175+
}
176+
contentsResponse.Encoding, contentsResponse.Content = blobResponse.Encoding, blobResponse.Content
177+
contentsResponse.LfsOid, contentsResponse.LfsSize = blobResponse.LfsOid, blobResponse.LfsSize
178+
} else if opts.IncludeLfsMetadata {
179+
contentsResponse.LfsOid, contentsResponse.LfsSize, err = parsePossibleLfsPointerBlob(gitRepo, entry.ID.String())
168180
if err != nil {
169181
return nil, err
170182
}
171-
contentsResponse.Encoding = blobResponse.Encoding
172-
contentsResponse.Content = blobResponse.Content
173183
}
174184
} else if entry.IsDir() {
175185
contentsResponse.Type = string(ContentTypeDir)
@@ -221,8 +231,7 @@ func getFileContentsByEntryInternal(ctx context.Context, repo *repo_model.Reposi
221231
return contentsResponse, nil
222232
}
223233

224-
// GetBlobBySHA get the GitBlobResponse of a repository using a sha hash.
225-
func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, sha string) (*api.GitBlobResponse, error) {
234+
func GetBlobBySHA(repo *repo_model.Repository, gitRepo *git.Repository, sha string) (*api.GitBlobResponse, error) {
226235
gitBlob, err := gitRepo.GetBlob(sha)
227236
if err != nil {
228237
return nil, err
@@ -232,12 +241,49 @@ func GetBlobBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
232241
URL: repo.APIURL() + "/git/blobs/" + url.PathEscape(gitBlob.ID.String()),
233242
Size: gitBlob.Size(),
234243
}
235-
if gitBlob.Size() <= setting.API.DefaultMaxBlobSize {
236-
content, err := gitBlob.GetBlobContentBase64()
237-
if err != nil {
238-
return nil, err
239-
}
240-
ret.Encoding, ret.Content = util.ToPointer("base64"), &content
244+
245+
blobSize := gitBlob.Size()
246+
if blobSize > setting.API.DefaultMaxBlobSize {
247+
return ret, nil
248+
}
249+
250+
var originContent *strings.Builder
251+
if 0 < blobSize && blobSize < lfs.MetaFileMaxSize {
252+
originContent = &strings.Builder{}
253+
}
254+
255+
content, err := gitBlob.GetBlobContentBase64(originContent)
256+
if err != nil {
257+
return nil, err
258+
}
259+
260+
ret.Encoding, ret.Content = util.ToPointer("base64"), &content
261+
if originContent != nil {
262+
ret.LfsOid, ret.LfsSize = parsePossibleLfsPointerBuffer(strings.NewReader(originContent.String()))
241263
}
242264
return ret, nil
243265
}
266+
267+
func parsePossibleLfsPointerBuffer(r io.Reader) (*string, *int64) {
268+
p, _ := lfs.ReadPointer(r)
269+
if p.IsValid() {
270+
return &p.Oid, &p.Size
271+
}
272+
return nil, nil
273+
}
274+
275+
func parsePossibleLfsPointerBlob(gitRepo *git.Repository, sha string) (*string, *int64, error) {
276+
gitBlob, err := gitRepo.GetBlob(sha)
277+
if err != nil {
278+
return nil, nil, err
279+
}
280+
if gitBlob.Size() > lfs.MetaFileMaxSize {
281+
return nil, nil, nil // not a LFS pointer
282+
}
283+
buf, err := gitBlob.GetBlobContent(lfs.MetaFileMaxSize)
284+
if err != nil {
285+
return nil, nil, err
286+
}
287+
oid, size := parsePossibleLfsPointerBuffer(strings.NewReader(buf))
288+
return oid, size, nil
289+
}

services/repository/files/content_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ func TestGetContents(t *testing.T) {
107107
sha := "65f1bf27bc3bf70f64657658635e66094edbcb4d"
108108
ctx.SetPathParam("id", "1")
109109
ctx.SetPathParam("sha", sha)
110-
gbr, err := GetBlobBySHA(ctx, ctx.Repo.Repository, ctx.Repo.GitRepo, ctx.PathParam("sha"))
110+
gbr, err := GetBlobBySHA(ctx.Repo.Repository, ctx.Repo.GitRepo, ctx.PathParam("sha"))
111111
expectedGBR := &api.GitBlobResponse{
112112
Content: util.ToPointer("dHJlZSAyYTJmMWQ0NjcwNzI4YTJlMTAwNDllMzQ1YmQ3YTI3NjQ2OGJlYWI2CmF1dGhvciB1c2VyMSA8YWRkcmVzczFAZXhhbXBsZS5jb20+IDE0ODk5NTY0NzkgLTA0MDAKY29tbWl0dGVyIEV0aGFuIEtvZW5pZyA8ZXRoYW50a29lbmlnQGdtYWlsLmNvbT4gMTQ4OTk1NjQ3OSAtMDQwMAoKSW5pdGlhbCBjb21taXQK"),
113113
Encoding: util.ToPointer("base64"),

0 commit comments

Comments
 (0)