From 9d6ebe7ccf761a239a2aea50cde8a680f71a5967 Mon Sep 17 00:00:00 2001 From: Germano Eichenberg Date: Mon, 28 Nov 2022 03:09:30 -0300 Subject: [PATCH] Remove request aborting references from code & readme --- CONFIG.md | 5 ----- README.md | 23 ++++++++--------------- lib/queue.go | 29 ++--------------------------- lib/queue_manager.go | 7 ++----- main.go | 13 ++++++------- 5 files changed, 18 insertions(+), 59 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 41dae82..6ac18d4 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -65,11 +65,6 @@ Allows you to define custom global request limits for one or multiple bots. The Format: Command separated list of **user id** and limit combo, separated by `:` and with no spaces at all. Don't use application ids. Example: `392827169497284619:100,227115752396685313:80` -##### RATELIMIT_ABORT_AFTER -Amount of seconds a request should wait for ratelimits befor aborting. `-1` means the request will never abort and `0` means the request will abort in the event of any form of ratelimiting. The value may be larger than a single ratelimit window. - -Default: -1 (no aborting) - ## Unstable env vars Collection of env vars that may be removed at any time, mainly used for Discord introducing new behaviour on their edge api versions diff --git a/README.md b/README.md index 0af185c..919bb7d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Nirn-proxy -Nirn-proxy is a highly available, transparent & dynamic HTTP proxy that handles Discord ratelimits for you and exports meaningful prometheus metrics. It is considered beta software but is being used in production by [Dyno](https://dyno.gg) on the scale of hundreds of requests per second. +Nirn-proxy is a highly available, transparent & dynamic HTTP proxy that +handles Discord ratelimits for you and exports meaningful prometheus metrics. +This project is at the heart of [Dyno](https://dyno.gg), handling several hundreds of requests per sec across hundreds of bots all while keeping 429s at ~100 per hour. It is designed to be minimally invasive and exploits common library patterns to make the adoption as simple as a URL change. @@ -7,18 +9,18 @@ It is designed to be minimally invasive and exploits common library patterns to - Highly available, horizontally scalable - Transparent ratelimit handling, per-route and global -- Multi-bot support with automatic detection for elevated REST limits (big bot sharding) - Works with any API version (Also supports using two or more versions for the same bot) - Small resource footprint - Works with webhooks - Works with Bearer tokens +- Supports an unlimited number of clients (Bots and Bearer) - Prometheus metrics exported out of the box - No hardcoded routes, therefore no need of updates for new routes introduced by Discord ### Usage Binaries can be found [here](https://github.com/germanoeich/nirn-proxy/releases). Docker images can be found [here](https://github.com/germanoeich/nirn-proxy/pkgs/container/nirn-proxy) -The proxy sits between the client and discord. Essentially, instead of pointing to discord.com, you point to whatever IP and port the proxy is running on, so discord.com/api/v9/gateway becomes 10.0.0.1:8080/api/v9/gateway. This can be achieved in many ways, some suggestions are host remapping on the OS level, DNS overrides or changes to the library code. Please note that the proxy currently does not support SSL. +The proxy sits between the client and discord. Instead of pointing to discord.com, you point to whatever IP and port the proxy is running on, so discord.com/api/v9/gateway becomes 10.0.0.1:8080/api/v9/gateway. This can be achieved in many ways, some suggestions are host remapping on the OS level, DNS overrides or changes to the library code. Please note that the proxy currently does not support SSL. Configuration options are @@ -39,7 +41,6 @@ Configuration options are | MAX_BEARER_COUNT| number | 1024 | | DISABLE_HTTP_2 | bool | true | | BOT_RATELIMIT_OVERRIDES | string list (comma separated) | "" | -| RATELIMIT_ABORT_AFTER | number | -1 | Information on each config var can be found [here](https://github.com/germanoeich/nirn-proxy/blob/main/CONFIG.md) @@ -47,22 +48,12 @@ Information on each config var can be found [here](https://github.com/germanoeic ### Behaviour -The proxy listens on all routes and relays them to Discord, while keeping track of ratelimit buckets and holding requests if there are no tokens to spare. The proxy fires requests sequentially for each bucket and ordering is preserved. The proxy does not modify the requests in any way so any library compatible with Discords API can be pointed at the proxy and it will not break the library, even with the libraries own ratelimiting intact. +The proxy listens on all routes and relays them to Discord, while keeping track of ratelimit buckets and making requests wait if there are no tokens to spare. The proxy fires requests sequentially for each bucket and ordering is preserved. The proxy does not modify the requests in any way so any library compatible with Discords API can be pointed at the proxy and it will not break the library, even with the libraries own ratelimiting intact. When using the proxy, it is safe to remove the ratelimiting logic from clients and fire requests instantly, however, the proxy does not handle retries. If for some reason (i.e shared ratelimits, internal discord ratelimits, etc) the proxy encounters a 429, it will return that to the client. It is safe to immediately retry requests that return 429 or even setup retry logic elsewhere (like in a load balancer or service mesh). The proxy also guards against known scenarios that might cause a cloudflare ban, like too many webhook 404s or too many 401s. -#### Ratelimit aborting - -The proxy allows requests to specify an `X-RateLimit-Abort-After` header (defaulted to the `RATELIMIT_ABORT_AFTER` variable). This sets the amount of seconds to wait in case of ratelimits before the proxy aborts the request and returns a 408 response. - -The point of ratelimit aborting is being able to send a request and set a maximum amount of time the request can be ratelimited. Certain enpoints have very high ratelimits and this configuration allows you to send the request and tell the proxy to abort it in case it needs to wait for ratelimits. Compared to timeouts, this is a much more reliable approach in the event of instabilities of the API. - -The special (and default) value `-1` indicates a request which should not abort. Set the value to `0` to abort if any ratelimiting will be necessary. If the value is higher than the allowed window of the ratelimit - for example an abort time of `8` for a ratelimit of `5 / 5s` - the value will be subtracted each time the proxy waits for the ratelimit. - -The proxy does not pre-emptively calculate how long a request will need to wait for ratelimits, therefore requests may not always immediately abort. In the above example with 8 seconds of abort time, the request will be aborted after roughly 5 seconds when the proxy fills the second window of the ratelimit and the request would have to wait for 10 seconds in total had it not been aborted. - ### Proxy specific responses The proxy may return a 408 Request Timeout if Discord takes more than $REQUEST_TIMEOUT milliseconds to respond. This allows you to identify and react to routes that have issues. @@ -73,6 +64,8 @@ Requests may also return a 408 status code in the event that they were aborted b The ratelimiting only works with `X-RateLimit-Precision` set to `seconds`. If you are using Discord API v8+, that is the only possible behaviour. For users on v6 or v7, please refer to your library docs for information on which precision it uses and how to change it to seconds. +The proxy tries its best to detect your REST global limits, but Discord does not expose this information. Be sure to set `BOT_RATELIMIT_OVERRIDES` for any clients with elevated limits. + ### High availability The proxy can be run in a cluster by setting either `CLUSTER_MEMBERS` or `CLUSTER_DNS` env vars. When in cluster mode, all nodes are a suitable gateway for all requests and the proxy will route requests consistently using the bucket hash. diff --git a/lib/queue.go b/lib/queue.go index 11def9d..a55a936 100644 --- a/lib/queue.go +++ b/lib/queue.go @@ -19,8 +19,6 @@ type QueueItem struct { Res *http.ResponseWriter doneChan chan *http.Response errChan chan error - // -1 means no abort - abortTime int } type QueueChannel struct { @@ -157,32 +155,19 @@ func safeSend(queue *QueueChannel, value *QueueItem) { queue.ch <- value } -func (q *RequestQueue) Queue(req *http.Request, res *http.ResponseWriter, path string, pathHash uint64, defaultAbort int) error { +func (q *RequestQueue) Queue(req *http.Request, res *http.ResponseWriter, path string, pathHash uint64) error { logger.WithFields(logrus.Fields{ "bucket": path, "path": req.URL.Path, "method": req.Method, }).Trace("Inbound request") - var abort int - abortHeader := req.Header.Get("X-RateLimit-Abort-After") - if abortHeader != "" { - valParsed, err := strconv.ParseInt(abortHeader, 10, 64) - if err != nil { - return err - } - - abort = int(valParsed) - } else { - abort = defaultAbort - } - ch := q.getQueueChannel(path, pathHash) doneChan := make(chan *http.Response) errChan := make(chan error) - safeSend(ch, &QueueItem{req, res, doneChan, errChan, abort}) + safeSend(ch, &QueueItem{req, res, doneChan, errChan}) select { case <-doneChan: @@ -286,16 +271,6 @@ func return401(item *QueueItem) { item.doneChan <- nil } -func generate408Aborted(resp *http.ResponseWriter) error { - res := *resp - - res.Header().Set("Generated-By-Proxy", "true") - res.WriteHeader(408) - - _, err := res.Write([]byte("{\n \"message\": \"Request aborted because of ratelimits\",\n \"code\": 0\n}")) - return err -} - func isInteraction(url string) bool { parts := strings.Split(strings.SplitN(url, "?", 1)[0], "/") for _, p := range parts { diff --git a/lib/queue_manager.go b/lib/queue_manager.go index a605b74..b366d91 100644 --- a/lib/queue_manager.go +++ b/lib/queue_manager.go @@ -39,8 +39,6 @@ type QueueManager struct { clusterGlobalRateLimiter *ClusterGlobalRateLimiter orderedClusterMembers []string nameToAddressMap map[string]string - // -1 means no abort - abortTime int localNodeName string localNodeIP string localNodeProxyListenAddr string @@ -50,7 +48,7 @@ func onEvictLruItem(key interface{}, value interface{}) { go value.(*RequestQueue).destroy() } -func NewQueueManager(bufferSize int, maxBearerLruSize int, abortTime int) *QueueManager { +func NewQueueManager(bufferSize int, maxBearerLruSize int) *QueueManager { bearerMap, err := lru.NewWithEvict(maxBearerLruSize, onEvictLruItem) if err != nil { @@ -61,7 +59,6 @@ func NewQueueManager(bufferSize int, maxBearerLruSize int, abortTime int) *Queue queues: make(map[string]*RequestQueue), bearerQueues: bearerMap, bufferSize: bufferSize, - abortTime: abortTime, cluster: nil, clusterGlobalRateLimiter: NewClusterGlobalRateLimiter(), } @@ -328,7 +325,7 @@ func (m *QueueManager) fulfillRequest(resp *http.ResponseWriter, req *http.Reque } } } - err = q.Queue(req, resp, path, pathHash, m.abortTime) + err = q.Queue(req, resp, path, pathHash) if err != nil { log := logEntry.WithField("function", "Queue") if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) { diff --git a/main.go b/main.go index 320c3b5..d159da1 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,7 @@ import ( ) var logger = logrus.New() + // token : queue map var bufferSize = 50 @@ -64,7 +65,7 @@ func initCluster(proxyPort string, manager *lib.QueueManager) *memberlist.Member return lib.InitMemberList(members, port, proxyPort, manager) } -func main() { +func main() { outboundIp := os.Getenv("OUTBOUND_IP") timeout := lib.EnvGetInt("REQUEST_TIMEOUT", 5000) @@ -73,7 +74,7 @@ func main() { globalOverrides := lib.EnvGet("BOT_RATELIMIT_OVERRIDES", "") - lib.ConfigureDiscordHTTPClient(outboundIp, time.Duration(timeout) * time.Millisecond, disableHttp2, globalOverrides) + lib.ConfigureDiscordHTTPClient(outboundIp, time.Duration(timeout)*time.Millisecond, disableHttp2, globalOverrides) port := lib.EnvGet("PORT", "8080") bindIp := lib.EnvGet("BIND_IP", "0.0.0.0") @@ -82,9 +83,8 @@ func main() { bufferSize = lib.EnvGetInt("BUFFER_SIZE", 50) maxBearerLruSize := lib.EnvGetInt("MAX_BEARER_COUNT", 1024) - abort := lib.EnvGetInt("RATELIMIT_ABORT_AFTER", -1) - manager := lib.NewQueueManager(bufferSize, maxBearerLruSize, abort) + manager := lib.NewQueueManager(bufferSize, maxBearerLruSize) mux := manager.CreateMux() @@ -105,7 +105,6 @@ func main() { go lib.StartMetrics(bindIp + ":" + port) } - done := make(chan os.Signal, 1) signal.Notify(done, os.Interrupt, syscall.SIGINT, syscall.SIGTERM) @@ -118,7 +117,7 @@ func main() { logger.Info("Started proxy on " + bindIp + ":" + port) // Wait for the http server to ready before joining the cluster - <- time.After(1 * time.Second) + <-time.After(1 * time.Second) initCluster(port, manager) <-done @@ -137,4 +136,4 @@ func main() { } logger.Info("Bye bye") -} \ No newline at end of file +}