diff --git a/cmd/gitjacker/main.go b/cmd/gitjacker/main.go index 51a47e2..3c2d077 100644 --- a/cmd/gitjacker/main.go +++ b/cmd/gitjacker/main.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "strings" + "time" "github.com/sirupsen/logrus" @@ -19,11 +20,13 @@ import ( var outputDir string var verbose bool +var wait time.Duration func main() { rootCmd.Flags().BoolVarP(&verbose, "verbose", "v", verbose, "Enable verbose logging") rootCmd.Flags().StringVarP(&outputDir, "output-dir", "o", outputDir, "Directory to output retrieved git repository - defaults to a temporary directory") + rootCmd.Flags().DurationVarP(&wait, "wait", "w", wait, "Wait this long between HTTP requests (to avoid throttling)") if err := rootCmd.Execute(); err != nil { os.Exit(1) @@ -89,7 +92,7 @@ Output Dir: %s _ = tml.Printf("\nGitjacking in progress...") } - summary, err := gitjacker.New(u, outputDir).Run() + summary, err := gitjacker.New(u, outputDir, wait).Run() if err != nil { if !verbose { fmt.Printf("\x1b[2K\r") diff --git a/go.mod b/go.mod index 7682a5c..59c5a8e 100644 --- a/go.mod +++ b/go.mod @@ -9,5 +9,6 @@ require ( github.com/sergi/go-diff v1.1.0 // indirect github.com/sirupsen/logrus v1.2.0 github.com/spf13/cobra v1.0.0 + golang.org/x/time v0.3.0 // indirect gopkg.in/src-d/go-git.v4 v4.13.1 // indirect ) diff --git a/go.sum b/go.sum index 2fba263..179b398 100644 --- a/go.sum +++ b/go.sum @@ -162,6 +162,8 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= +golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/internal/pkg/gitjacker/retriever.go b/internal/pkg/gitjacker/retriever.go index 36d3dbb..9364e37 100644 --- a/internal/pkg/gitjacker/retriever.go +++ b/internal/pkg/gitjacker/retriever.go @@ -1,6 +1,7 @@ package gitjacker import ( + "context" "fmt" "io/ioutil" "net/http" @@ -14,6 +15,7 @@ import ( "crypto/tls" "github.com/sirupsen/logrus" + "golang.org/x/time/rate" ) var paths = []string{ @@ -37,6 +39,7 @@ type retriever struct { baseURL *url.URL outputDir string http *http.Client + limiter *rate.Limiter downloaded map[string]bool summary Summary } @@ -88,7 +91,7 @@ type Branch struct { Remote string } -func New(target *url.URL, outputDir string) *retriever { +func New(target *url.URL, outputDir string, wait time.Duration) *retriever { relative, _ := url.Parse(".git/") target = target.ResolveReference(relative) @@ -103,6 +106,7 @@ func New(target *url.URL, outputDir string) *retriever { Timeout: time.Second * 10, Transport: customTransport, }, + limiter: rate.NewLimiter(rate.Every(wait), 1), downloaded: make(map[string]bool), summary: Summary{ OutputDirectory: outputDir, @@ -172,6 +176,11 @@ func (r *retriever) downloadFile(path string) error { return err } + err = r.limiter.Wait(context.Background()) + if err != nil { + return err + } + absolute := r.baseURL.ResolveReference(relative) resp, err := r.http.Get(absolute.String()) if err != nil { diff --git a/vendor/golang.org/x/time/LICENSE b/vendor/golang.org/x/time/LICENSE new file mode 100644 index 0000000..6a66aea --- /dev/null +++ b/vendor/golang.org/x/time/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/golang.org/x/time/PATENTS b/vendor/golang.org/x/time/PATENTS new file mode 100644 index 0000000..7330990 --- /dev/null +++ b/vendor/golang.org/x/time/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the Go project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of Go, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of Go. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of Go or any code incorporated within this +implementation of Go constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of Go +shall terminate as of the date such litigation is filed. diff --git a/vendor/golang.org/x/time/rate/rate.go b/vendor/golang.org/x/time/rate/rate.go new file mode 100644 index 0000000..f0e0cf3 --- /dev/null +++ b/vendor/golang.org/x/time/rate/rate.go @@ -0,0 +1,428 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package rate provides a rate limiter. +package rate + +import ( + "context" + "fmt" + "math" + "sync" + "time" +) + +// Limit defines the maximum frequency of some events. +// Limit is represented as number of events per second. +// A zero Limit allows no events. +type Limit float64 + +// Inf is the infinite rate limit; it allows all events (even if burst is zero). +const Inf = Limit(math.MaxFloat64) + +// Every converts a minimum time interval between events to a Limit. +func Every(interval time.Duration) Limit { + if interval <= 0 { + return Inf + } + return 1 / Limit(interval.Seconds()) +} + +// A Limiter controls how frequently events are allowed to happen. +// It implements a "token bucket" of size b, initially full and refilled +// at rate r tokens per second. +// Informally, in any large enough time interval, the Limiter limits the +// rate to r tokens per second, with a maximum burst size of b events. +// As a special case, if r == Inf (the infinite rate), b is ignored. +// See https://en.wikipedia.org/wiki/Token_bucket for more about token buckets. +// +// The zero value is a valid Limiter, but it will reject all events. +// Use NewLimiter to create non-zero Limiters. +// +// Limiter has three main methods, Allow, Reserve, and Wait. +// Most callers should use Wait. +// +// Each of the three methods consumes a single token. +// They differ in their behavior when no token is available. +// If no token is available, Allow returns false. +// If no token is available, Reserve returns a reservation for a future token +// and the amount of time the caller must wait before using it. +// If no token is available, Wait blocks until one can be obtained +// or its associated context.Context is canceled. +// +// The methods AllowN, ReserveN, and WaitN consume n tokens. +type Limiter struct { + mu sync.Mutex + limit Limit + burst int + tokens float64 + // last is the last time the limiter's tokens field was updated + last time.Time + // lastEvent is the latest time of a rate-limited event (past or future) + lastEvent time.Time +} + +// Limit returns the maximum overall event rate. +func (lim *Limiter) Limit() Limit { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.limit +} + +// Burst returns the maximum burst size. Burst is the maximum number of tokens +// that can be consumed in a single call to Allow, Reserve, or Wait, so higher +// Burst values allow more events to happen at once. +// A zero Burst allows no events, unless limit == Inf. +func (lim *Limiter) Burst() int { + lim.mu.Lock() + defer lim.mu.Unlock() + return lim.burst +} + +// TokensAt returns the number of tokens available at time t. +func (lim *Limiter) TokensAt(t time.Time) float64 { + lim.mu.Lock() + _, tokens := lim.advance(t) // does not mutate lim + lim.mu.Unlock() + return tokens +} + +// Tokens returns the number of tokens available now. +func (lim *Limiter) Tokens() float64 { + return lim.TokensAt(time.Now()) +} + +// NewLimiter returns a new Limiter that allows events up to rate r and permits +// bursts of at most b tokens. +func NewLimiter(r Limit, b int) *Limiter { + return &Limiter{ + limit: r, + burst: b, + } +} + +// Allow reports whether an event may happen now. +func (lim *Limiter) Allow() bool { + return lim.AllowN(time.Now(), 1) +} + +// AllowN reports whether n events may happen at time t. +// Use this method if you intend to drop / skip events that exceed the rate limit. +// Otherwise use Reserve or Wait. +func (lim *Limiter) AllowN(t time.Time, n int) bool { + return lim.reserveN(t, n, 0).ok +} + +// A Reservation holds information about events that are permitted by a Limiter to happen after a delay. +// A Reservation may be canceled, which may enable the Limiter to permit additional events. +type Reservation struct { + ok bool + lim *Limiter + tokens int + timeToAct time.Time + // This is the Limit at reservation time, it can change later. + limit Limit +} + +// OK returns whether the limiter can provide the requested number of tokens +// within the maximum wait time. If OK is false, Delay returns InfDuration, and +// Cancel does nothing. +func (r *Reservation) OK() bool { + return r.ok +} + +// Delay is shorthand for DelayFrom(time.Now()). +func (r *Reservation) Delay() time.Duration { + return r.DelayFrom(time.Now()) +} + +// InfDuration is the duration returned by Delay when a Reservation is not OK. +const InfDuration = time.Duration(math.MaxInt64) + +// DelayFrom returns the duration for which the reservation holder must wait +// before taking the reserved action. Zero duration means act immediately. +// InfDuration means the limiter cannot grant the tokens requested in this +// Reservation within the maximum wait time. +func (r *Reservation) DelayFrom(t time.Time) time.Duration { + if !r.ok { + return InfDuration + } + delay := r.timeToAct.Sub(t) + if delay < 0 { + return 0 + } + return delay +} + +// Cancel is shorthand for CancelAt(time.Now()). +func (r *Reservation) Cancel() { + r.CancelAt(time.Now()) +} + +// CancelAt indicates that the reservation holder will not perform the reserved action +// and reverses the effects of this Reservation on the rate limit as much as possible, +// considering that other reservations may have already been made. +func (r *Reservation) CancelAt(t time.Time) { + if !r.ok { + return + } + + r.lim.mu.Lock() + defer r.lim.mu.Unlock() + + if r.lim.limit == Inf || r.tokens == 0 || r.timeToAct.Before(t) { + return + } + + // calculate tokens to restore + // The duration between lim.lastEvent and r.timeToAct tells us how many tokens were reserved + // after r was obtained. These tokens should not be restored. + restoreTokens := float64(r.tokens) - r.limit.tokensFromDuration(r.lim.lastEvent.Sub(r.timeToAct)) + if restoreTokens <= 0 { + return + } + // advance time to now + t, tokens := r.lim.advance(t) + // calculate new number of tokens + tokens += restoreTokens + if burst := float64(r.lim.burst); tokens > burst { + tokens = burst + } + // update state + r.lim.last = t + r.lim.tokens = tokens + if r.timeToAct == r.lim.lastEvent { + prevEvent := r.timeToAct.Add(r.limit.durationFromTokens(float64(-r.tokens))) + if !prevEvent.Before(t) { + r.lim.lastEvent = prevEvent + } + } +} + +// Reserve is shorthand for ReserveN(time.Now(), 1). +func (lim *Limiter) Reserve() *Reservation { + return lim.ReserveN(time.Now(), 1) +} + +// ReserveN returns a Reservation that indicates how long the caller must wait before n events happen. +// The Limiter takes this Reservation into account when allowing future events. +// The returned Reservation’s OK() method returns false if n exceeds the Limiter's burst size. +// Usage example: +// +// r := lim.ReserveN(time.Now(), 1) +// if !r.OK() { +// // Not allowed to act! Did you remember to set lim.burst to be > 0 ? +// return +// } +// time.Sleep(r.Delay()) +// Act() +// +// Use this method if you wish to wait and slow down in accordance with the rate limit without dropping events. +// If you need to respect a deadline or cancel the delay, use Wait instead. +// To drop or skip events exceeding rate limit, use Allow instead. +func (lim *Limiter) ReserveN(t time.Time, n int) *Reservation { + r := lim.reserveN(t, n, InfDuration) + return &r +} + +// Wait is shorthand for WaitN(ctx, 1). +func (lim *Limiter) Wait(ctx context.Context) (err error) { + return lim.WaitN(ctx, 1) +} + +// WaitN blocks until lim permits n events to happen. +// It returns an error if n exceeds the Limiter's burst size, the Context is +// canceled, or the expected wait time exceeds the Context's Deadline. +// The burst limit is ignored if the rate limit is Inf. +func (lim *Limiter) WaitN(ctx context.Context, n int) (err error) { + // The test code calls lim.wait with a fake timer generator. + // This is the real timer generator. + newTimer := func(d time.Duration) (<-chan time.Time, func() bool, func()) { + timer := time.NewTimer(d) + return timer.C, timer.Stop, func() {} + } + + return lim.wait(ctx, n, time.Now(), newTimer) +} + +// wait is the internal implementation of WaitN. +func (lim *Limiter) wait(ctx context.Context, n int, t time.Time, newTimer func(d time.Duration) (<-chan time.Time, func() bool, func())) error { + lim.mu.Lock() + burst := lim.burst + limit := lim.limit + lim.mu.Unlock() + + if n > burst && limit != Inf { + return fmt.Errorf("rate: Wait(n=%d) exceeds limiter's burst %d", n, burst) + } + // Check if ctx is already cancelled + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + // Determine wait limit + waitLimit := InfDuration + if deadline, ok := ctx.Deadline(); ok { + waitLimit = deadline.Sub(t) + } + // Reserve + r := lim.reserveN(t, n, waitLimit) + if !r.ok { + return fmt.Errorf("rate: Wait(n=%d) would exceed context deadline", n) + } + // Wait if necessary + delay := r.DelayFrom(t) + if delay == 0 { + return nil + } + ch, stop, advance := newTimer(delay) + defer stop() + advance() // only has an effect when testing + select { + case <-ch: + // We can proceed. + return nil + case <-ctx.Done(): + // Context was canceled before we could proceed. Cancel the + // reservation, which may permit other events to proceed sooner. + r.Cancel() + return ctx.Err() + } +} + +// SetLimit is shorthand for SetLimitAt(time.Now(), newLimit). +func (lim *Limiter) SetLimit(newLimit Limit) { + lim.SetLimitAt(time.Now(), newLimit) +} + +// SetLimitAt sets a new Limit for the limiter. The new Limit, and Burst, may be violated +// or underutilized by those which reserved (using Reserve or Wait) but did not yet act +// before SetLimitAt was called. +func (lim *Limiter) SetLimitAt(t time.Time, newLimit Limit) { + lim.mu.Lock() + defer lim.mu.Unlock() + + t, tokens := lim.advance(t) + + lim.last = t + lim.tokens = tokens + lim.limit = newLimit +} + +// SetBurst is shorthand for SetBurstAt(time.Now(), newBurst). +func (lim *Limiter) SetBurst(newBurst int) { + lim.SetBurstAt(time.Now(), newBurst) +} + +// SetBurstAt sets a new burst size for the limiter. +func (lim *Limiter) SetBurstAt(t time.Time, newBurst int) { + lim.mu.Lock() + defer lim.mu.Unlock() + + t, tokens := lim.advance(t) + + lim.last = t + lim.tokens = tokens + lim.burst = newBurst +} + +// reserveN is a helper method for AllowN, ReserveN, and WaitN. +// maxFutureReserve specifies the maximum reservation wait duration allowed. +// reserveN returns Reservation, not *Reservation, to avoid allocation in AllowN and WaitN. +func (lim *Limiter) reserveN(t time.Time, n int, maxFutureReserve time.Duration) Reservation { + lim.mu.Lock() + defer lim.mu.Unlock() + + if lim.limit == Inf { + return Reservation{ + ok: true, + lim: lim, + tokens: n, + timeToAct: t, + } + } else if lim.limit == 0 { + var ok bool + if lim.burst >= n { + ok = true + lim.burst -= n + } + return Reservation{ + ok: ok, + lim: lim, + tokens: lim.burst, + timeToAct: t, + } + } + + t, tokens := lim.advance(t) + + // Calculate the remaining number of tokens resulting from the request. + tokens -= float64(n) + + // Calculate the wait duration + var waitDuration time.Duration + if tokens < 0 { + waitDuration = lim.limit.durationFromTokens(-tokens) + } + + // Decide result + ok := n <= lim.burst && waitDuration <= maxFutureReserve + + // Prepare reservation + r := Reservation{ + ok: ok, + lim: lim, + limit: lim.limit, + } + if ok { + r.tokens = n + r.timeToAct = t.Add(waitDuration) + + // Update state + lim.last = t + lim.tokens = tokens + lim.lastEvent = r.timeToAct + } + + return r +} + +// advance calculates and returns an updated state for lim resulting from the passage of time. +// lim is not changed. +// advance requires that lim.mu is held. +func (lim *Limiter) advance(t time.Time) (newT time.Time, newTokens float64) { + last := lim.last + if t.Before(last) { + last = t + } + + // Calculate the new number of tokens, due to time that passed. + elapsed := t.Sub(last) + delta := lim.limit.tokensFromDuration(elapsed) + tokens := lim.tokens + delta + if burst := float64(lim.burst); tokens > burst { + tokens = burst + } + return t, tokens +} + +// durationFromTokens is a unit conversion function from the number of tokens to the duration +// of time it takes to accumulate them at a rate of limit tokens per second. +func (limit Limit) durationFromTokens(tokens float64) time.Duration { + if limit <= 0 { + return InfDuration + } + seconds := tokens / float64(limit) + return time.Duration(float64(time.Second) * seconds) +} + +// tokensFromDuration is a unit conversion function from a time duration to the number of tokens +// which could be accumulated during that duration at a rate of limit tokens per second. +func (limit Limit) tokensFromDuration(d time.Duration) float64 { + if limit <= 0 { + return 0 + } + return d.Seconds() * float64(limit) +} diff --git a/vendor/golang.org/x/time/rate/sometimes.go b/vendor/golang.org/x/time/rate/sometimes.go new file mode 100644 index 0000000..6ba99dd --- /dev/null +++ b/vendor/golang.org/x/time/rate/sometimes.go @@ -0,0 +1,67 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rate + +import ( + "sync" + "time" +) + +// Sometimes will perform an action occasionally. The First, Every, and +// Interval fields govern the behavior of Do, which performs the action. +// A zero Sometimes value will perform an action exactly once. +// +// # Example: logging with rate limiting +// +// var sometimes = rate.Sometimes{First: 3, Interval: 10*time.Second} +// func Spammy() { +// sometimes.Do(func() { log.Info("here I am!") }) +// } +type Sometimes struct { + First int // if non-zero, the first N calls to Do will run f. + Every int // if non-zero, every Nth call to Do will run f. + Interval time.Duration // if non-zero and Interval has elapsed since f's last run, Do will run f. + + mu sync.Mutex + count int // number of Do calls + last time.Time // last time f was run +} + +// Do runs the function f as allowed by First, Every, and Interval. +// +// The model is a union (not intersection) of filters. The first call to Do +// always runs f. Subsequent calls to Do run f if allowed by First or Every or +// Interval. +// +// A non-zero First:N causes the first N Do(f) calls to run f. +// +// A non-zero Every:M causes every Mth Do(f) call, starting with the first, to +// run f. +// +// A non-zero Interval causes Do(f) to run f if Interval has elapsed since +// Do last ran f. +// +// Specifying multiple filters produces the union of these execution streams. +// For example, specifying both First:N and Every:M causes the first N Do(f) +// calls and every Mth Do(f) call, starting with the first, to run f. See +// Examples for more. +// +// If Do is called multiple times simultaneously, the calls will block and run +// serially. Therefore, Do is intended for lightweight operations. +// +// Because a call to Do may block until f returns, if f causes Do to be called, +// it will deadlock. +func (s *Sometimes) Do(f func()) { + s.mu.Lock() + defer s.mu.Unlock() + if s.count == 0 || + (s.First > 0 && s.count < s.First) || + (s.Every > 0 && s.count%s.Every == 0) || + (s.Interval > 0 && time.Since(s.last) >= s.Interval) { + f() + s.last = time.Now() + } + s.count++ +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 2be9b3a..b667864 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -25,5 +25,8 @@ golang.org/x/crypto/ssh/terminal # golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e golang.org/x/sys/unix golang.org/x/sys/windows +# golang.org/x/time v0.3.0 +## explicit +golang.org/x/time/rate # gopkg.in/src-d/go-git.v4 v4.13.1 ## explicit