diff --git a/cmd/bundler.go b/cmd/bundler.go index 7867dd2..2c68c2f 100644 --- a/cmd/bundler.go +++ b/cmd/bundler.go @@ -1,16 +1,20 @@ package cmd import ( + "net/http" + parser_io "github.com/aquasecurity/go-dep-parser/pkg/io" "github.com/aquasecurity/go-dep-parser/pkg/ruby/bundler" "github.com/aquasecurity/go-dep-parser/pkg/types" ) type BundlerDoctor struct { + HTTPClient http.Client } func NewBundlerDoctor() *BundlerDoctor { - return &BundlerDoctor{} + client := &http.Client{} + return &BundlerDoctor{HTTPClient: *client} } func (d *BundlerDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { @@ -21,6 +25,6 @@ func (d *BundlerDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { func (d *BundlerDoctor) SourceCodeURL(name string) (string, error) { rubyGems := RubyGems{name: name} - url, err := rubyGems.fetchURLFromRegistry() + url, err := rubyGems.fetchURLFromRegistry(d.HTTPClient) return url, err } diff --git a/cmd/diagnose.go b/cmd/diagnose.go index d8f3578..cab84a1 100644 --- a/cmd/diagnose.go +++ b/cmd/diagnose.go @@ -6,6 +6,7 @@ import ( "log" "os" "strings" + "sync" "github.com/MakeNowJust/heredoc" "github.com/aquasecurity/go-dep-parser/pkg/io" @@ -19,6 +20,10 @@ import ( const MAX_YEAR_TO_BE_BLANK = 5 +// referenced as the number of goroutine parallels +// should be optimized? +const FETCH_REPOS_PER_ONCE = 20 + type Diagnosis struct { Name string Url string @@ -35,33 +40,46 @@ type MedicalTechnician interface { func FetchRepositoryParams(libs []types.Library, g MedicalTechnician) []github.FetchRepositoryParam { var params []github.FetchRepositoryParam + maxConcurrency := FETCH_REPOS_PER_ONCE + var wg sync.WaitGroup + sem := make(chan struct{}, maxConcurrency) + for _, lib := range libs { - fmt.Printf("%s\n", lib.Name) + wg.Add(1) + sem <- struct{}{} + go func(lib types.Library) { + defer wg.Done() + defer func() { <-sem }() - githubUrl, err := g.SourceCodeURL(lib.Name) - if err != nil { - continue - } + fmt.Printf("%s\n", lib.Name) + + githubUrl, err := g.SourceCodeURL(lib.Name) + if err != nil { + return + } + + repo, err := github.ParseGitHubUrl(githubUrl) + if err != nil { + params = append(params, + github.FetchRepositoryParam{ + PackageName: lib.Name, + CanSearch: false, + }, + ) + return + } - repo, err := github.ParseGitHubUrl(githubUrl) - if err != nil { params = append(params, github.FetchRepositoryParam{ + Repo: repo.Repo, + Owner: repo.Owner, PackageName: lib.Name, - CanSearch: false, + CanSearch: true, }, ) - continue - } + }(lib) - params = append(params, - github.FetchRepositoryParam{ - Repo: repo.Repo, - Owner: repo.Owner, - PackageName: lib.Name, - CanSearch: true, - }, - ) + wg.Wait() } return params @@ -82,22 +100,34 @@ func Diagnose(d MedicalTechnician, r io.ReadSeekCloserAt, year int, ignores []st slicedParams = append(slicedParams, fetchRepositoryParams[i:end]) } + maxConcurrency := FETCH_REPOS_PER_ONCE + var wg sync.WaitGroup + sem := make(chan struct{}, maxConcurrency) for _, param := range slicedParams { - repos := github.FetchFromGitHub(param) - for _, r := range repos { - isIgnore := slices.Contains(ignores, r.Name) - diagnosis := Diagnosis{ - Name: r.Name, - Url: r.Url, - Archived: r.Archived, - Ignored: isIgnore, - Diagnosed: true, - IsActive: r.IsActive(year), + wg.Add(1) + sem <- struct{}{} + go func(param []github.FetchRepositoryParam) { + defer wg.Done() + defer func() { <-sem }() + + repos := github.FetchFromGitHub(param) + for _, r := range repos { + isIgnore := slices.Contains(ignores, r.Name) + diagnosis := Diagnosis{ + Name: r.Name, + Url: r.Url, + Archived: r.Archived, + Ignored: isIgnore, + Diagnosed: true, + IsActive: r.IsActive(year), + } + diagnoses[r.Name] = diagnosis } - diagnoses[r.Name] = diagnosis - } + }(param) } + wg.Wait() + for _, fetchRepositoryParam := range fetchRepositoryParams { if fetchRepositoryParam.CanSearch { continue diff --git a/cmd/nodejs.go b/cmd/nodejs.go index 90c034f..58f4d0f 100644 --- a/cmd/nodejs.go +++ b/cmd/nodejs.go @@ -2,6 +2,7 @@ package cmd import ( "encoding/json" + "errors" "fmt" "io" "net/http" @@ -20,15 +21,28 @@ type Nodejs struct { name string } -func (n *Nodejs) fetchURLFromRegistry() (string, error) { +func (n *Nodejs) fetchURLFromRegistry(client http.Client) (string, error) { url := fmt.Sprintf(NODEJS_REGISTRY_API, n.name) - req, _ := http.NewRequest(http.MethodGet, url, nil) - client := new(http.Client) - resp, _ := client.Do(req) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", err + } + + resp, err := client.Do(req) + if err != nil { + return "", nil + } + + defer resp.Body.Close() + if resp.StatusCode < 200 || 299 < resp.StatusCode { + m := fmt.Sprintf("Got status code: %d from %s", resp.StatusCode, RUBY_GEMS_REGISTRY_API) + return "", errors.New(m) + } + body, _ := io.ReadAll(resp.Body) var NodejsRegistryResponse NodejsRegistryResponse - err := json.Unmarshal(body, &NodejsRegistryResponse) + err = json.Unmarshal(body, &NodejsRegistryResponse) if err != nil { return "", nil } diff --git a/cmd/nodejs_test.go b/cmd/nodejs_test.go index e834e8a..7ae2672 100644 --- a/cmd/nodejs_test.go +++ b/cmd/nodejs_test.go @@ -1,6 +1,7 @@ package cmd import ( + "net/http" "strings" "testing" @@ -30,7 +31,7 @@ func TestNodejs_fetchURLFromRegistry(t *testing.T) { for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { n := Nodejs{name: tt.dep_name} - r, _ := n.fetchURLFromRegistry() + r, _ := n.fetchURLFromRegistry(http.Client{}) expect := expects[i] assert.Equal(t, true, strings.HasPrefix(r, expect.url)) }) diff --git a/cmd/npm.go b/cmd/npm.go index 97d23de..938d58d 100644 --- a/cmd/npm.go +++ b/cmd/npm.go @@ -1,12 +1,15 @@ package cmd import ( + "net/http" + parser_io "github.com/aquasecurity/go-dep-parser/pkg/io" "github.com/aquasecurity/go-dep-parser/pkg/nodejs/npm" "github.com/aquasecurity/go-dep-parser/pkg/types" ) type NPMDoctor struct { + HTTPClient http.Client } func NewNPMDoctor() *NPMDoctor { @@ -21,6 +24,6 @@ func (d *NPMDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { func (d *NPMDoctor) SourceCodeURL(name string) (string, error) { nodejs := Nodejs{name: name} - url, err := nodejs.fetchURLFromRegistry() + url, err := nodejs.fetchURLFromRegistry(d.HTTPClient) return url, err } diff --git a/cmd/pip.go b/cmd/pip.go index 9545660..c5368b2 100644 --- a/cmd/pip.go +++ b/cmd/pip.go @@ -1,16 +1,20 @@ package cmd import ( + "net/http" + parser_io "github.com/aquasecurity/go-dep-parser/pkg/io" "github.com/aquasecurity/go-dep-parser/pkg/python/pip" "github.com/aquasecurity/go-dep-parser/pkg/types" ) type PipDoctor struct { + HTTPClient http.Client } func NewPipDoctor() *PipDoctor { - return &PipDoctor{} + client := &http.Client{} + return &PipDoctor{HTTPClient: *client} } func (d *PipDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { @@ -21,6 +25,6 @@ func (d *PipDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { func (d *PipDoctor) SourceCodeURL(name string) (string, error) { pypi := Pypi{name: name} - url, err := pypi.fetchURLFromRepository() + url, err := pypi.fetchURLFromRegistry(d.HTTPClient) return url, err } diff --git a/cmd/pypi.go b/cmd/pypi.go index 259b8b1..967d740 100644 --- a/cmd/pypi.go +++ b/cmd/pypi.go @@ -2,6 +2,7 @@ package cmd import ( "encoding/json" + "errors" "fmt" "io" "net/http" @@ -23,15 +24,28 @@ type Pypi struct { name string } -func (p *Pypi) fetchURLFromRepository() (string, error) { +func (p *Pypi) fetchURLFromRegistry(client http.Client) (string, error) { url := fmt.Sprintf(PYPI_REGISTRY_API, p.name) - req, _ := http.NewRequest(http.MethodGet, url, nil) - client := new(http.Client) - resp, _ := client.Do(req) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", err + } + + resp, err := client.Do(req) + if err != nil { + return "", err + } + + defer resp.Body.Close() + if resp.StatusCode < 200 || 299 < resp.StatusCode { + m := fmt.Sprintf("Got status code: %d from %s", resp.StatusCode, RUBY_GEMS_REGISTRY_API) + return "", errors.New(m) + } + body, _ := io.ReadAll(resp.Body) var PypiRegistryResponse PypiRegistryResponse - err := json.Unmarshal(body, &PypiRegistryResponse) + err = json.Unmarshal(body, &PypiRegistryResponse) if err != nil { return "", nil } diff --git a/cmd/pypi_test.go b/cmd/pypi_test.go index 1af3b45..d2f140b 100644 --- a/cmd/pypi_test.go +++ b/cmd/pypi_test.go @@ -1,6 +1,7 @@ package cmd import ( + "net/http" "testing" "github.com/stretchr/testify/assert" @@ -29,7 +30,7 @@ func TestPyPi_fetchURLFromRegistry(t *testing.T) { for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { p := Pypi{name: tt.dep_name} - r, _ := p.fetchURLFromRepository() + r, _ := p.fetchURLFromRegistry(http.Client{}) expect := expects[i] assert.Equal(t, expect.url, r) }) diff --git a/cmd/ruby_gems.go b/cmd/ruby_gems.go index 91224dd..0783296 100644 --- a/cmd/ruby_gems.go +++ b/cmd/ruby_gems.go @@ -21,17 +21,30 @@ type RubyGems struct { name string } -func (g *RubyGems) fetchURLFromRegistry() (string, error) { +func (g *RubyGems) fetchURLFromRegistry(client http.Client) (string, error) { url := fmt.Sprintf(RUBY_GEMS_REGISTRY_API, g.name) - req, _ := http.NewRequest(http.MethodGet, url, nil) - client := new(http.Client) - resp, _ := client.Do(req) + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return "", err + } + + resp, err := client.Do(req) + if err != nil { + return "", err + } + + defer resp.Body.Close() + if resp.StatusCode < 200 || 299 < resp.StatusCode { + m := fmt.Sprintf("Got status code: %d from %s", resp.StatusCode, RUBY_GEMS_REGISTRY_API) + return "", errors.New(m) + } + body, _ := io.ReadAll(resp.Body) var Gem RubyGemsRegistryResponse - err := json.Unmarshal(body, &Gem) + err = json.Unmarshal(body, &Gem) if err != nil { - return "", errors.New("error: Unknown response") + return "", err } if Gem.SourceCodeUri != "" { diff --git a/cmd/ruby_gems_test.go b/cmd/ruby_gems_test.go index ac73edd..c5b8554 100644 --- a/cmd/ruby_gems_test.go +++ b/cmd/ruby_gems_test.go @@ -1,13 +1,14 @@ package cmd import ( + "net/http" "strings" "testing" "github.com/stretchr/testify/assert" ) -func TestRubyGems_fetchURLFromRepository(t *testing.T) { +func TestRubyGems_fetchURLFromRegistry(t *testing.T) { tests := []struct { name string gem_name string @@ -38,7 +39,7 @@ func TestRubyGems_fetchURLFromRepository(t *testing.T) { for i, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := RubyGems{name: tt.gem_name} - r, _ := g.fetchURLFromRegistry() + r, _ := g.fetchURLFromRegistry(http.Client{}) expect := expects[i] assert.Equal(t, true, strings.HasPrefix(r, expect.url)) }) diff --git a/cmd/yarn.go b/cmd/yarn.go index 8a9aa9a..00ab437 100644 --- a/cmd/yarn.go +++ b/cmd/yarn.go @@ -1,16 +1,20 @@ package cmd import ( + "net/http" + parser_io "github.com/aquasecurity/go-dep-parser/pkg/io" "github.com/aquasecurity/go-dep-parser/pkg/nodejs/yarn" "github.com/aquasecurity/go-dep-parser/pkg/types" ) type YarnDoctor struct { + HTTPClient http.Client } func NewYarnDoctor() *YarnDoctor { - return &YarnDoctor{} + client := &http.Client{} + return &YarnDoctor{HTTPClient: *client} } func (d *YarnDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { @@ -21,6 +25,6 @@ func (d *YarnDoctor) Deps(r parser_io.ReadSeekerAt) []types.Library { func (d *YarnDoctor) SourceCodeURL(name string) (string, error) { nodejs := Nodejs{name: name} - url, err := nodejs.fetchURLFromRegistry() + url, err := nodejs.fetchURLFromRegistry(d.HTTPClient) return url, err }